diff --git a/.github/workflows/merge-linux.yml b/.github/workflows/merge-linux.yml index 733df61527..d68519f379 100644 --- a/.github/workflows/merge-linux.yml +++ b/.github/workflows/merge-linux.yml @@ -64,7 +64,7 @@ jobs: runs-on: ubuntu-latest env: KERNEL_URL: https://cdn.kernel.org/pub/linux/kernel/v5.x/ - KERNEL_VERSION: linux-5.15.153 + KERNEL_VERSION: linux-5.15.161 KERNEL_BRANCH: linux-5.15 steps: - uses: actions/checkout@v3 diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index eecbd16033..23e0537f6e 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -517,6 +517,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/mds /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling /sys/devices/system/cpu/vulnerabilities/retbleed /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/spectre_v1 diff --git a/Documentation/admin-guide/filesystem-monitoring.rst b/Documentation/admin-guide/filesystem-monitoring.rst new file mode 100644 index 0000000000..5a3c84e600 --- /dev/null +++ b/Documentation/admin-guide/filesystem-monitoring.rst @@ -0,0 +1,74 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================================== +File system Monitoring with fanotify +==================================== + +File system Error Reporting +=========================== + +Fanotify supports the FAN_FS_ERROR event type for file system-wide error +reporting. It is meant to be used by file system health monitoring +daemons, which listen for these events and take actions (notify +sysadmin, start recovery) when a file system problem is detected. + +By design, a FAN_FS_ERROR notification exposes sufficient information +for a monitoring tool to know a problem in the file system has happened. +It doesn't necessarily provide a user space application with semantics +to verify an IO operation was successfully executed. That is out of +scope for this feature. Instead, it is only meant as a framework for +early file system problem detection and reporting recovery tools. + +When a file system operation fails, it is common for dozens of kernel +errors to cascade after the initial failure, hiding the original failure +log, which is usually the most useful debug data to troubleshoot the +problem. For this reason, FAN_FS_ERROR tries to report only the first +error that occurred for a file system since the last notification, and +it simply counts additional errors. This ensures that the most +important pieces of information are never lost. + +FAN_FS_ERROR requires the fanotify group to be setup with the +FAN_REPORT_FID flag. + +At the time of this writing, the only file system that emits FAN_FS_ERROR +notifications is Ext4. + +A FAN_FS_ERROR Notification has the following format:: + + [ Notification Metadata (Mandatory) ] + [ Generic Error Record (Mandatory) ] + [ FID record (Mandatory) ] + +The order of records is not guaranteed, and new records might be added +in the future. Therefore, applications must not rely on the order and +must be prepared to skip over unknown records. Please refer to +``samples/fanotify/fs-monitor.c`` for an example parser. + +Generic error record +-------------------- + +The generic error record provides enough information for a file system +agnostic tool to learn about a problem in the file system, without +providing any additional details about the problem. This record is +identified by ``struct fanotify_event_info_header.info_type`` being set +to FAN_EVENT_INFO_TYPE_ERROR. + + struct fanotify_event_info_error { + struct fanotify_event_info_header hdr; + __s32 error; + __u32 error_count; + }; + +The `error` field identifies the type of error using errno values. +`error_count` tracks the number of errors that occurred and were +suppressed to preserve the original error information, since the last +notification. + +FID record +---------- + +The FID record can be used to uniquely identify the inode that triggered +the error through the combination of fsid and file handle. A file system +specific application can use that information to attempt a recovery +procedure. Errors that are not related to an inode are reported with an +empty file handle of type FILEID_INVALID. diff --git a/Documentation/admin-guide/hw-vuln/core-scheduling.rst b/Documentation/admin-guide/hw-vuln/core-scheduling.rst index 0febe45859..b9ab02325e 100644 --- a/Documentation/admin-guide/hw-vuln/core-scheduling.rst +++ b/Documentation/admin-guide/hw-vuln/core-scheduling.rst @@ -66,8 +66,8 @@ arg4: will be performed for all tasks in the task group of ``pid``. arg5: - userspace pointer to an unsigned long for storing the cookie returned by - ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands. + userspace pointer to an unsigned long long for storing the cookie returned + by ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands. In order for a process to push a cookie to, or pull a cookie from a process, it is required to have the ptrace access mode: `PTRACE_MODE_READ_REALCREDS` to the diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 6828102baa..3e4a14e38b 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -21,3 +21,4 @@ are configurable at compile, boot or run time. cross-thread-rsb.rst gather_data_sampling.rst srso + reg-file-data-sampling diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst new file mode 100644 index 0000000000..810424b4b7 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst @@ -0,0 +1,104 @@ +================================== +Register File Data Sampling (RFDS) +================================== + +Register File Data Sampling (RFDS) is a microarchitectural vulnerability that +only affects Intel Atom parts(also branded as E-cores). RFDS may allow +a malicious actor to infer data values previously used in floating point +registers, vector registers, or integer registers. RFDS does not provide the +ability to choose which data is inferred. CVE-2023-28746 is assigned to RFDS. + +Affected Processors +=================== +Below is the list of affected Intel processors [#f1]_: + + =================== ============ + Common name Family_Model + =================== ============ + ATOM_GOLDMONT 06_5CH + ATOM_GOLDMONT_D 06_5FH + ATOM_GOLDMONT_PLUS 06_7AH + ATOM_TREMONT_D 06_86H + ATOM_TREMONT 06_96H + ALDERLAKE 06_97H + ALDERLAKE_L 06_9AH + ATOM_TREMONT_L 06_9CH + RAPTORLAKE 06_B7H + RAPTORLAKE_P 06_BAH + ALDERLAKE_N 06_BEH + RAPTORLAKE_S 06_BFH + =================== ============ + +As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and +RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as +vulnerable in Linux because they share the same family/model with an affected +part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or +CPUID.HYBRID. This information could be used to distinguish between the +affected and unaffected parts, but it is deemed not worth adding complexity as +the reporting is fixed automatically when these parts enumerate RFDS_NO. + +Mitigation +========== +Intel released a microcode update that enables software to clear sensitive +information using the VERW instruction. Like MDS, RFDS deploys the same +mitigation strategy to force the CPU to clear the affected buffers before an +attacker can extract the secrets. This is achieved by using the otherwise +unused and obsolete VERW instruction in combination with a microcode update. +The microcode clears the affected CPU buffers when the VERW instruction is +executed. + +Mitigation points +----------------- +VERW is executed by the kernel before returning to user space, and by KVM +before VMentry. None of the affected cores support SMT, so VERW is not required +at C-state transitions. + +New bits in IA32_ARCH_CAPABILITIES +---------------------------------- +Newer processors and microcode update on existing affected processors added new +bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate +vulnerability and mitigation capability: + +- Bit 27 - RFDS_NO - When set, processor is not affected by RFDS. +- Bit 28 - RFDS_CLEAR - When set, processor is affected by RFDS, and has the + microcode that clears the affected buffers on VERW execution. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows to control RFDS mitigation at boot time with the +parameter "reg_file_data_sampling=". The valid arguments are: + + ========== ================================================================= + on If the CPU is vulnerable, enable mitigation; CPU buffer clearing + on exit to userspace and before entering a VM. + off Disables mitigation. + ========== ================================================================= + +Mitigation default is selected by CONFIG_MITIGATION_RFDS. + +Mitigation status information +----------------------------- +The Linux kernel provides a sysfs interface to enumerate the current +vulnerability status of the system: whether the system is vulnerable, and +which mitigations are active. The relevant sysfs file is: + + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: No microcode' + - The processor is vulnerable but microcode is not updated. + * - 'Mitigation: Clear Register File' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + +References +---------- +.. [#f1] Affected Processors + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 0fba3758d0..b038410ecc 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically, the BHB might be shared across privilege levels even in the presence of Enhanced IBRS. -Currently the only known real-world BHB attack vector is via -unprivileged eBPF. Therefore, it's highly recommended to not enable -unprivileged eBPF, especially when eIBRS is used (without retpolines). -For a full mitigation against BHB attacks, it's recommended to use -retpolines (or eIBRS combined with retpolines). +Previously the only known real-world BHB attack vector was via unprivileged +eBPF. Further research has found attacks that don't require unprivileged eBPF. +For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or +use the BHB clearing sequence. Attack scenarios ---------------- @@ -430,6 +429,23 @@ The possible values in this file are: 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB =========================== ======================================================= + - Branch History Injection (BHI) protection status: + +.. list-table:: + + * - BHI: Not affected + - System is not affected + * - BHI: Retpoline + - System is protected by retpoline + * - BHI: BHI_DIS_S + - System is protected by BHI_DIS_S + * - BHI: SW loop, KVM SW loop + - System is protected by software clearing sequence + * - BHI: Vulnerable + - System is vulnerable to BHI + * - BHI: Vulnerable, KVM: SW loop + - System is vulnerable; KVM is protected by software clearing sequence + Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will report vulnerability. @@ -484,11 +500,18 @@ Spectre variant 2 Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at boot, by setting the IBRS bit, and they're automatically protected against - Spectre v2 variant attacks, including cross-thread branch target injections - on SMT systems (STIBP). In other words, eIBRS enables STIBP too. + some Spectre v2 variant attacks. The BHB can still influence the choice of + indirect branch predictor entry, and although branch predictor entries are + isolated between modes when eIBRS is enabled, the BHB itself is not isolated + between modes. Systems which support BHI_DIS_S will set it to protect against + BHI attacks. - Legacy IBRS systems clear the IBRS bit on exit to userspace and - therefore explicitly enable STIBP for that + On Intel's enhanced IBRS systems, this includes cross-thread branch target + injections on SMT systems (STIBP). In other words, Intel eIBRS enables + STIBP, too. + + AMD Automatic IBRS does not protect userspace, and Legacy IBRS systems clear + the IBRS bit on exit to userspace, therefore both explicitly enable STIBP. The retpoline mitigation is turned on by default on vulnerable CPUs. It can be forced on or off by the administrator @@ -622,9 +645,10 @@ kernel command line. retpoline,generic Retpolines retpoline,lfence LFENCE; indirect branch retpoline,amd alias for retpoline,lfence - eibrs enhanced IBRS - eibrs,retpoline enhanced IBRS + Retpolines - eibrs,lfence enhanced IBRS + LFENCE + eibrs Enhanced/Auto IBRS + eibrs,retpoline Enhanced/Auto IBRS + Retpolines + eibrs,lfence Enhanced/Auto IBRS + LFENCE + ibrs use IBRS to protect kernel Not specifying this option is equivalent to spectre_v2=auto. @@ -684,6 +708,20 @@ For user space mitigation: spectre_v2=off. Spectre variant 1 mitigations cannot be disabled. + spectre_bhi= + + [X86] Control mitigation of Branch History Injection + (BHI) vulnerability. This setting affects the deployment + of the HW BHI control and the SW BHB clearing sequence. + + on + (default) Enable the HW or SW mitigation as + needed. + off + Disable the mitigation. + +For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt + Mitigation selection guide -------------------------- diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index dc00afcabb..1bedab4981 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -82,6 +82,7 @@ configure specific aspects of kernel behavior to your liking. edid efi-stub ext4 + filesystem-monitoring nfs/index gpio/index highuid diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c4c7273419..e61f0d038c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1037,6 +1037,26 @@ The filter can be disabled or changed to another driver later using sysfs. + reg_file_data_sampling= + [X86] Controls mitigation for Register File Data + Sampling (RFDS) vulnerability. RFDS is a CPU + vulnerability which may allow userspace to infer + kernel data values previously stored in floating point + registers, vector registers, or integer registers. + RFDS only affects Intel Atom processors. + + on: Turns ON the mitigation. + off: Turns OFF the mitigation. + + This parameter overrides the compile time default set + by CONFIG_MITIGATION_RFDS. Mitigation cannot be + disabled when other VERW based mitigations (like MDS) + are enabled. In order to disable RFDS mitigation all + VERW based mitigations need to be disabled. + + For details see: + Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst + driver_async_probe= [KNL] List of driver names to be probed asynchronously. Format: ,... @@ -3070,8 +3090,10 @@ nopti [X86,PPC] nospectre_v1 [X86,PPC] nospectre_v2 [X86,PPC,S390,ARM64] + reg_file_data_sampling=off [X86] retbleed=off [X86] spec_store_bypass_disable=off [X86,PPC] + spectre_bhi=off [X86] spectre_v2_user=off [X86] ssbd=force-off [ARM64] tsx_async_abort=off [X86] @@ -5383,6 +5405,15 @@ sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/admin-guide/laptops/sonypi.rst + spectre_bhi= [X86] Control mitigation of Branch History Injection + (BHI) vulnerability. This setting affects the + deployment of the HW BHI control and the SW BHB + clearing sequence. + + on - (default) Enable the HW or SW mitigation + as needed. + off - Disable the mitigation. + spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. The default operation protects the kernel from @@ -5413,9 +5444,9 @@ retpoline,generic - Retpolines retpoline,lfence - LFENCE; indirect branch retpoline,amd - alias for retpoline,lfence - eibrs - enhanced IBRS - eibrs,retpoline - enhanced IBRS + Retpolines - eibrs,lfence - enhanced IBRS + LFENCE + eibrs - Enhanced/Auto IBRS + eibrs,retpoline - Enhanced/Auto IBRS + Retpolines + eibrs,lfence - Enhanced/Auto IBRS + LFENCE ibrs - use IBRS to protect kernel Not specifying this option is equivalent to diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst index 6d6d0edd2d..829f20a193 100644 --- a/Documentation/core-api/dma-api.rst +++ b/Documentation/core-api/dma-api.rst @@ -204,6 +204,20 @@ Returns the maximum size of a mapping for the device. The size parameter of the mapping functions like dma_map_single(), dma_map_page() and others should not be larger than the returned value. +:: + + size_t + dma_opt_mapping_size(struct device *dev); + +Returns the maximum optimal size of a mapping for the device. + +Mapping larger buffers may take much longer in certain scenarios. In +addition, for high-rate short-lived streaming mappings, the upfront time +spent on the mapping may account for an appreciable part of the total +request lifetime. As such, if splitting larger requests incurs no +significant performance penalty, then device drivers are advised to +limit total DMA streaming mappings length to the returned value. + :: bool diff --git a/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml b/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml index c13c10c8d6..eed0df9d3a 100644 --- a/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml +++ b/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml @@ -42,7 +42,7 @@ allOf: properties: compatible: contains: - const: maxim,max30100 + const: maxim,max30102 then: properties: maxim,green-led-current-microamp: false diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov2680.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov2680.yaml index cf456f8d9d..c87677f5e2 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov2680.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov2680.yaml @@ -37,15 +37,15 @@ properties: active low. maxItems: 1 - dovdd-supply: + DOVDD-supply: description: Definition of the regulator used as interface power supply. - avdd-supply: + AVDD-supply: description: Definition of the regulator used as analog power supply. - dvdd-supply: + DVDD-supply: description: Definition of the regulator used as digital power supply. @@ -59,9 +59,9 @@ required: - reg - clocks - clock-names - - dovdd-supply - - avdd-supply - - dvdd-supply + - DOVDD-supply + - AVDD-supply + - DVDD-supply - reset-gpios - port @@ -82,9 +82,9 @@ examples: clock-names = "xvclk"; reset-gpios = <&gpio1 3 GPIO_ACTIVE_LOW>; - dovdd-supply = <&sw2_reg>; - dvdd-supply = <&sw2_reg>; - avdd-supply = <®_peri_3p15v>; + DOVDD-supply = <&sw2_reg>; + DVDD-supply = <&sw2_reg>; + AVDD-supply = <®_peri_3p15v>; port { ov2680_to_mipi: endpoint { diff --git a/Documentation/devicetree/bindings/pci/rcar-pci-host.yaml b/Documentation/devicetree/bindings/pci/rcar-pci-host.yaml index 8fdfbc763d..835b6db00c 100644 --- a/Documentation/devicetree/bindings/pci/rcar-pci-host.yaml +++ b/Documentation/devicetree/bindings/pci/rcar-pci-host.yaml @@ -68,6 +68,18 @@ properties: phy-names: const: pcie + vpcie1v5-supply: + description: The 1.5v regulator to use for PCIe. + + vpcie3v3-supply: + description: The 3.3v regulator to use for PCIe. + + vpcie12v-supply: + description: The 12v regulator to use for PCIe. + + iommu-map: true + iommu-map-mask: true + required: - compatible - reg @@ -121,5 +133,7 @@ examples: clock-names = "pcie", "pcie_bus"; power-domains = <&sysc R8A7791_PD_ALWAYS_ON>; resets = <&cpg 319>; + vpcie3v3-supply = <&pcie_3v3>; + vpcie12v-supply = <&pcie_12v>; }; }; diff --git a/Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml index 0feecd376c..9aed3a58f3 100644 --- a/Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml @@ -94,7 +94,8 @@ patternProperties: then: properties: groups: - enum: [emmc, emmc_rst] + items: + enum: [emmc, emmc_rst] - if: properties: function: @@ -102,8 +103,9 @@ patternProperties: then: properties: groups: - enum: [esw, esw_p0_p1, esw_p2_p3_p4, rgmii_via_esw, - rgmii_via_gmac1, rgmii_via_gmac2, mdc_mdio] + items: + enum: [esw, esw_p0_p1, esw_p2_p3_p4, rgmii_via_esw, + rgmii_via_gmac1, rgmii_via_gmac2, mdc_mdio] - if: properties: function: @@ -120,10 +122,11 @@ patternProperties: then: properties: groups: - enum: [i2s_in_mclk_bclk_ws, i2s1_in_data, i2s2_in_data, - i2s3_in_data, i2s4_in_data, i2s_out_mclk_bclk_ws, - i2s1_out_data, i2s2_out_data, i2s3_out_data, - i2s4_out_data] + items: + enum: [i2s_in_mclk_bclk_ws, i2s1_in_data, i2s2_in_data, + i2s3_in_data, i2s4_in_data, i2s_out_mclk_bclk_ws, + i2s1_out_data, i2s2_out_data, i2s3_out_data, + i2s4_out_data] - if: properties: function: @@ -156,10 +159,11 @@ patternProperties: then: properties: groups: - enum: [pcie0_0_waken, pcie0_1_waken, pcie1_0_waken, - pcie0_0_clkreq, pcie0_1_clkreq, pcie1_0_clkreq, - pcie0_pad_perst, pcie1_pad_perst, pcie_pereset, - pcie_wake, pcie_clkreq] + items: + enum: [pcie0_0_waken, pcie0_1_waken, pcie1_0_waken, + pcie0_0_clkreq, pcie0_1_clkreq, pcie1_0_clkreq, + pcie0_pad_perst, pcie1_pad_perst, pcie_pereset, + pcie_wake, pcie_clkreq] - if: properties: function: @@ -175,11 +179,12 @@ patternProperties: then: properties: groups: - enum: [pwm_ch1_0, pwm_ch1_1, pwm_ch1_2, pwm_ch2_0, pwm_ch2_1, - pwm_ch2_2, pwm_ch3_0, pwm_ch3_1, pwm_ch3_2, pwm_ch4_0, - pwm_ch4_1, pwm_ch4_2, pwm_ch4_3, pwm_ch5_0, pwm_ch5_1, - pwm_ch5_2, pwm_ch6_0, pwm_ch6_1, pwm_ch6_2, pwm_ch6_3, - pwm_ch7_0, pwm_0, pwm_1] + items: + enum: [pwm_ch1_0, pwm_ch1_1, pwm_ch1_2, pwm_ch2_0, pwm_ch2_1, + pwm_ch2_2, pwm_ch3_0, pwm_ch3_1, pwm_ch3_2, pwm_ch4_0, + pwm_ch4_1, pwm_ch4_2, pwm_ch4_3, pwm_ch5_0, pwm_ch5_1, + pwm_ch5_2, pwm_ch6_0, pwm_ch6_1, pwm_ch6_2, pwm_ch6_3, + pwm_ch7_0, pwm_0, pwm_1] - if: properties: function: @@ -257,33 +262,34 @@ patternProperties: pins: description: | An array of strings. Each string contains the name of a pin. - enum: [GPIO_A, I2S1_IN, I2S1_OUT, I2S_BCLK, I2S_WS, I2S_MCLK, TXD0, - RXD0, SPI_WP, SPI_HOLD, SPI_CLK, SPI_MOSI, SPI_MISO, SPI_CS, - I2C_SDA, I2C_SCL, I2S2_IN, I2S3_IN, I2S4_IN, I2S2_OUT, - I2S3_OUT, I2S4_OUT, GPIO_B, MDC, MDIO, G2_TXD0, G2_TXD1, - G2_TXD2, G2_TXD3, G2_TXEN, G2_TXC, G2_RXD0, G2_RXD1, G2_RXD2, - G2_RXD3, G2_RXDV, G2_RXC, NCEB, NWEB, NREB, NDL4, NDL5, NDL6, - NDL7, NRB, NCLE, NALE, NDL0, NDL1, NDL2, NDL3, MDI_TP_P0, - MDI_TN_P0, MDI_RP_P0, MDI_RN_P0, MDI_TP_P1, MDI_TN_P1, - MDI_RP_P1, MDI_RN_P1, MDI_RP_P2, MDI_RN_P2, MDI_TP_P2, - MDI_TN_P2, MDI_TP_P3, MDI_TN_P3, MDI_RP_P3, MDI_RN_P3, - MDI_RP_P4, MDI_RN_P4, MDI_TP_P4, MDI_TN_P4, PMIC_SCL, - PMIC_SDA, SPIC1_CLK, SPIC1_MOSI, SPIC1_MISO, SPIC1_CS, - GPIO_D, WATCHDOG, RTS3_N, CTS3_N, TXD3, RXD3, PERST0_N, - PERST1_N, WLED_N, EPHY_LED0_N, AUXIN0, AUXIN1, AUXIN2, - AUXIN3, TXD4, RXD4, RTS4_N, CST4_N, PWM1, PWM2, PWM3, PWM4, - PWM5, PWM6, PWM7, GPIO_E, TOP_5G_CLK, TOP_5G_DATA, - WF0_5G_HB0, WF0_5G_HB1, WF0_5G_HB2, WF0_5G_HB3, WF0_5G_HB4, - WF0_5G_HB5, WF0_5G_HB6, XO_REQ, TOP_RST_N, SYS_WATCHDOG, - EPHY_LED0_N_JTDO, EPHY_LED1_N_JTDI, EPHY_LED2_N_JTMS, - EPHY_LED3_N_JTCLK, EPHY_LED4_N_JTRST_N, WF2G_LED_N, - WF5G_LED_N, GPIO_9, GPIO_10, GPIO_11, GPIO_12, UART1_TXD, - UART1_RXD, UART1_CTS, UART1_RTS, UART2_TXD, UART2_RXD, - UART2_CTS, UART2_RTS, SMI_MDC, SMI_MDIO, PCIE_PERESET_N, - PWM_0, GPIO_0, GPIO_1, GPIO_2, GPIO_3, GPIO_4, GPIO_5, - GPIO_6, GPIO_7, GPIO_8, UART0_TXD, UART0_RXD, TOP_2G_CLK, - TOP_2G_DATA, WF0_2G_HB0, WF0_2G_HB1, WF0_2G_HB2, WF0_2G_HB3, - WF0_2G_HB4, WF0_2G_HB5, WF0_2G_HB6] + items: + enum: [GPIO_A, I2S1_IN, I2S1_OUT, I2S_BCLK, I2S_WS, I2S_MCLK, TXD0, + RXD0, SPI_WP, SPI_HOLD, SPI_CLK, SPI_MOSI, SPI_MISO, SPI_CS, + I2C_SDA, I2C_SCL, I2S2_IN, I2S3_IN, I2S4_IN, I2S2_OUT, + I2S3_OUT, I2S4_OUT, GPIO_B, MDC, MDIO, G2_TXD0, G2_TXD1, + G2_TXD2, G2_TXD3, G2_TXEN, G2_TXC, G2_RXD0, G2_RXD1, G2_RXD2, + G2_RXD3, G2_RXDV, G2_RXC, NCEB, NWEB, NREB, NDL4, NDL5, NDL6, + NDL7, NRB, NCLE, NALE, NDL0, NDL1, NDL2, NDL3, MDI_TP_P0, + MDI_TN_P0, MDI_RP_P0, MDI_RN_P0, MDI_TP_P1, MDI_TN_P1, + MDI_RP_P1, MDI_RN_P1, MDI_RP_P2, MDI_RN_P2, MDI_TP_P2, + MDI_TN_P2, MDI_TP_P3, MDI_TN_P3, MDI_RP_P3, MDI_RN_P3, + MDI_RP_P4, MDI_RN_P4, MDI_TP_P4, MDI_TN_P4, PMIC_SCL, + PMIC_SDA, SPIC1_CLK, SPIC1_MOSI, SPIC1_MISO, SPIC1_CS, + GPIO_D, WATCHDOG, RTS3_N, CTS3_N, TXD3, RXD3, PERST0_N, + PERST1_N, WLED_N, EPHY_LED0_N, AUXIN0, AUXIN1, AUXIN2, + AUXIN3, TXD4, RXD4, RTS4_N, CST4_N, PWM1, PWM2, PWM3, PWM4, + PWM5, PWM6, PWM7, GPIO_E, TOP_5G_CLK, TOP_5G_DATA, + WF0_5G_HB0, WF0_5G_HB1, WF0_5G_HB2, WF0_5G_HB3, WF0_5G_HB4, + WF0_5G_HB5, WF0_5G_HB6, XO_REQ, TOP_RST_N, SYS_WATCHDOG, + EPHY_LED0_N_JTDO, EPHY_LED1_N_JTDI, EPHY_LED2_N_JTMS, + EPHY_LED3_N_JTCLK, EPHY_LED4_N_JTRST_N, WF2G_LED_N, + WF5G_LED_N, GPIO_9, GPIO_10, GPIO_11, GPIO_12, UART1_TXD, + UART1_RXD, UART1_CTS, UART1_RTS, UART2_TXD, UART2_RXD, + UART2_CTS, UART2_RTS, SMI_MDC, SMI_MDIO, PCIE_PERESET_N, + PWM_0, GPIO_0, GPIO_1, GPIO_2, GPIO_3, GPIO_4, GPIO_5, + GPIO_6, GPIO_7, GPIO_8, UART0_TXD, UART0_RXD, TOP_2G_CLK, + TOP_2G_DATA, WF0_2G_HB0, WF0_2G_HB1, WF0_2G_HB2, WF0_2G_HB3, + WF0_2G_HB4, WF0_2G_HB5, WF0_2G_HB6] bias-disable: true diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml index dfebf425ca..8fe31a7083 100644 --- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml +++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml @@ -141,6 +141,7 @@ allOf: unevaluatedProperties: false pcie-phy: + type: object description: Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt diff --git a/Documentation/devicetree/bindings/sound/rt5645.txt b/Documentation/devicetree/bindings/sound/rt5645.txt index 41a62fd2ae..c1fa379f5f 100644 --- a/Documentation/devicetree/bindings/sound/rt5645.txt +++ b/Documentation/devicetree/bindings/sound/rt5645.txt @@ -20,6 +20,11 @@ Optional properties: a GPIO spec for the external headphone detect pin. If jd-mode = 0, we will get the JD status by getting the value of hp-detect-gpios. +- cbj-sleeve-gpios: + a GPIO spec to control the external combo jack circuit to tie the sleeve/ring2 + contacts to the ground or floating. It could avoid some electric noise from the + active speaker jacks. + - realtek,in2-differential Boolean. Indicate MIC2 input are differential, rather than single-ended. @@ -68,6 +73,7 @@ codec: rt5650@1a { compatible = "realtek,rt5650"; reg = <0x1a>; hp-detect-gpios = <&gpio 19 0>; + cbj-sleeve-gpios = <&gpio 20 0>; interrupt-parent = <&gpio>; interrupts = <7 IRQ_TYPE_EDGE_FALLING>; realtek,dmic-en = "true"; diff --git a/Documentation/driver-api/fpga/fpga-region.rst b/Documentation/driver-api/fpga/fpga-region.rst index 2636a27c11..2d03b5fb76 100644 --- a/Documentation/driver-api/fpga/fpga-region.rst +++ b/Documentation/driver-api/fpga/fpga-region.rst @@ -46,10 +46,16 @@ API to add a new FPGA region ---------------------------- * struct fpga_region - The FPGA region struct -* devm_fpga_region_create() - Allocate and init a region struct -* fpga_region_register() - Register an FPGA region +* struct fpga_region_info - Parameter structure for __fpga_region_register_full() +* __fpga_region_register_full() - Create and register an FPGA region using the + fpga_region_info structure to provide the full flexibility of options +* __fpga_region_register() - Create and register an FPGA region using standard + arguments * fpga_region_unregister() - Unregister an FPGA region +Helper macros ``fpga_region_register()`` and ``fpga_region_register_full()`` +automatically set the module that registers the FPGA region as the owner. + The FPGA region's probe function will need to get a reference to the FPGA Manager it will be using to do the programming. This usually would happen during the region's probe function. @@ -75,11 +81,14 @@ following APIs to handle building or tearing down that list. .. kernel-doc:: include/linux/fpga/fpga-region.h :functions: fpga_region +.. kernel-doc:: include/linux/fpga/fpga-region.h + :functions: fpga_region_info + .. kernel-doc:: drivers/fpga/fpga-region.c - :functions: devm_fpga_region_create + :functions: __fpga_region_register_full .. kernel-doc:: drivers/fpga/fpga-region.c - :functions: fpga_region_register + :functions: __fpga_region_register .. kernel-doc:: drivers/fpga/fpga-region.c :functions: fpga_region_unregister diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 5833cea4a1..dabf4f7c75 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -442,17 +442,21 @@ prototypes:: void (*lm_break)(struct file_lock *); /* break_lease callback */ int (*lm_change)(struct file_lock **, int); bool (*lm_breaker_owns_lease)(struct file_lock *); + bool (*lm_lock_expirable)(struct file_lock *); + void (*lm_expire_lock)(void); locking rules: ====================== ============= ================= ========= -ops inode->i_lock blocked_lock_lock may block +ops flc_lock blocked_lock_lock may block ====================== ============= ================= ========= -lm_notify: yes yes no +lm_notify: no yes no lm_grant: no no no lm_break: yes no no lm_change yes no no -lm_breaker_owns_lease: no no no +lm_breaker_owns_lease: yes no no +lm_lock_expirable yes no no +lm_expire_lock no no yes ====================== ============= ================= ========= buffer_head diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst index 0e98edd353..6f59a364f8 100644 --- a/Documentation/filesystems/nfs/exporting.rst +++ b/Documentation/filesystems/nfs/exporting.rst @@ -215,3 +215,29 @@ following flags are defined: This flag causes nfsd to close any open files for this inode _before_ calling into the vfs to do an unlink or a rename that would replace an existing file. + + EXPORT_OP_REMOTE_FS - Backing storage for this filesystem is remote + PF_LOCAL_THROTTLE exists for loopback NFSD, where a thread needs to + write to one bdi (the final bdi) in order to free up writes queued + to another bdi (the client bdi). Such threads get a private balance + of dirty pages so that dirty pages for the client bdi do not imact + the daemon writing to the final bdi. For filesystems whose durable + storage is not local (such as exported NFS filesystems), this + constraint has negative consequences. EXPORT_OP_REMOTE_FS enables + an export to disable writeback throttling. + + EXPORT_OP_NOATOMIC_ATTR - Filesystem does not update attributes atomically + EXPORT_OP_NOATOMIC_ATTR indicates that the exported filesystem + cannot provide the semantics required by the "atomic" boolean in + NFSv4's change_info4. This boolean indicates to a client whether the + returned before and after change attributes were obtained atomically + with the respect to the requested metadata operation (UNLINK, + OPEN/CREATE, MKDIR, etc). + + EXPORT_OP_FLUSH_ON_CLOSE - Filesystem flushes file data on close(2) + On most filesystems, inodes can remain under writeback after the + file is closed. NFSD relies on client activity or local flusher + threads to handle writeback. Certain filesystems, such as NFS, flush + all of an inode's dirty data on last close. Exports that behave this + way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip + waiting for writeback when closing such files. diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst index 01b2a69b0c..a8a2aa2ae8 100644 --- a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst +++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst @@ -205,6 +205,7 @@ Adaptive coalescing can be switched on/off through `ethtool(8)`'s More information about Adaptive Interrupt Moderation (DIM) can be found in Documentation/networking/net_dim.rst +.. _`RX copybreak`: RX copybreak ============ The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK @@ -315,3 +316,34 @@ Rx - The new SKB is updated with the necessary information (protocol, checksum hw verify result, etc), and then passed to the network stack, using the NAPI interface function :code:`napi_gro_receive()`. + +Dynamic RX Buffers (DRB) +------------------------ + +Each RX descriptor in the RX ring is a single memory page (which is either 4KB +or 16KB long depending on system's configurations). +To reduce the memory allocations required when dealing with a high rate of small +packets, the driver tries to reuse the remaining RX descriptor's space if more +than 2KB of this page remain unused. + +A simple example of this mechanism is the following sequence of events: + +:: + + 1. Driver allocates page-sized RX buffer and passes it to hardware + +----------------------+ + |4KB RX Buffer | + +----------------------+ + + 2. A 300Bytes packet is received on this buffer + + 3. The driver increases the ref count on this page and returns it back to + HW as an RX buffer of size 4KB - 300Bytes = 3796 Bytes + +----+--------------------+ + |****|3796 Bytes RX Buffer| + +----+--------------------+ + +This mechanism isn't used when an XDP program is loaded, or when the +RX packet is less than rx_copybreak bytes (in which case the packet is +copied out of the RX buffer into the linear part of a new skb allocated +for it and the RX buffer remains the same size, see `RX copybreak`_). diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py index f523aa68a3..cf601bd058 100755 --- a/Documentation/sphinx/kernel_include.py +++ b/Documentation/sphinx/kernel_include.py @@ -94,7 +94,6 @@ def _run(self): # HINT: this is the only line I had to change / commented out: #path = utils.relative_path(None, path) - path = nodes.reprunicode(path) encoding = self.options.get( 'encoding', self.state.document.settings.input_encoding) e_handler=self.state.document.settings.input_encoding_error_handler diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 5d4330be20..e801df0bb3 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing: mds_clear_cpu_buffers() +Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. +Other than CFLAGS.ZF, this macro doesn't clobber any registers. + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state (idle) transitions. @@ -138,17 +141,30 @@ Mitigation points When transitioning from kernel to user space the CPU buffers are flushed on affected CPUs when the mitigation is not disabled on the kernel - command line. The migitation is enabled through the static key - mds_user_clear. - - The mitigation is invoked in prepare_exit_to_usermode() which covers - all but one of the kernel to user space transitions. The exception - is when we return from a Non Maskable Interrupt (NMI), which is - handled directly in do_nmi(). - - (The reason that NMI is special is that prepare_exit_to_usermode() can - enable IRQs. In NMI context, NMIs are blocked, and we don't want to - enable IRQs with NMIs blocked.) + command line. The mitigation is enabled through the feature flag + X86_FEATURE_CLEAR_CPU_BUF. + + The mitigation is invoked just before transitioning to userspace after + user registers are restored. This is done to minimize the window in + which kernel data could be accessed after VERW e.g. via an NMI after + VERW. + + **Corner case not handled** + Interrupts returning to kernel don't clear CPUs buffers since the + exit-to-user path is expected to do that anyways. But, there could be + a case when an NMI is generated in kernel after the exit-to-user path + has cleared the buffers. This case is not handled and NMI returning to + kernel don't clear CPU buffers because: + + 1. It is rare to get an NMI after VERW, but before returning to userspace. + 2. For an unprivileged user, there is no known way to make that NMI + less rare or target it. + 3. It would take a large number of these precisely-timed NMIs to mount + an actual attack. There's presumably not enough bandwidth. + 4. The NMI in question occurs after a VERW, i.e. when user state is + restored and most interesting data is already scrubbed. Whats left + is only the data that NMI touches, and that may or may not be of + any interest. 2. C-State transition diff --git a/MAINTAINERS b/MAINTAINERS index 9216b9c85c..6bfc75861c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9200,6 +9200,7 @@ F: drivers/infiniband/ F: include/rdma/ F: include/trace/events/ib_mad.h F: include/trace/events/ib_umad.h +F: include/trace/misc/rdma.h F: include/uapi/linux/if_infiniband.h F: include/uapi/rdma/ F: samples/bpf/ibumad_kern.c @@ -10181,6 +10182,12 @@ F: fs/nfs_common/ F: fs/nfsd/ F: include/linux/lockd/ F: include/linux/sunrpc/ +F: include/trace/events/rpcgss.h +F: include/trace/events/rpcrdma.h +F: include/trace/events/sunrpc.h +F: include/trace/misc/fs.h +F: include/trace/misc/nfs.h +F: include/trace/misc/sunrpc.h F: include/uapi/linux/nfsd/ F: include/uapi/linux/sunrpc/ F: net/sunrpc/ diff --git a/Makefile b/Makefile index 08ba128253..c12163de91 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 153 +SUBLEVEL = 161 EXTRAVERSION = NAME = Trick or Treat @@ -1002,8 +1002,8 @@ KBUILD_CFLAGS += $(CC_FLAGS_CFI) export CC_FLAGS_CFI endif -ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B -KBUILD_CFLAGS += -falign-functions=64 +ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0) +KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT) endif # arch Makefile may override CC so keep this after arch Makefile is included diff --git a/arch/Kconfig b/arch/Kconfig index 2e2dc0975a..69e3743206 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -9,6 +9,14 @@ # source "arch/$(SRCARCH)/Kconfig" +config ARCH_CONFIGURES_CPU_MITIGATIONS + bool + +if !ARCH_CONFIGURES_CPU_MITIGATIONS +config CPU_MITIGATIONS + def_bool y +endif + menu "General architecture-dependent options" config CRASH_CORE @@ -1303,4 +1311,28 @@ source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" +config FUNCTION_ALIGNMENT_4B + bool + +config FUNCTION_ALIGNMENT_8B + bool + +config FUNCTION_ALIGNMENT_16B + bool + +config FUNCTION_ALIGNMENT_32B + bool + +config FUNCTION_ALIGNMENT_64B + bool + +config FUNCTION_ALIGNMENT + int + default 64 if FUNCTION_ALIGNMENT_64B + default 32 if FUNCTION_ALIGNMENT_32B + default 16 if FUNCTION_ALIGNMENT_16B + default 8 if FUNCTION_ALIGNMENT_8B + default 4 if FUNCTION_ALIGNMENT_4B + default 0 + endmenu diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index dcaa44e408..27f4194b37 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -205,7 +205,6 @@ }; gmac: ethernet@8000 { - #interrupt-cells = <1>; compatible = "snps,dwmac"; reg = <0x8000 0x2000>; interrupts = <10>; diff --git a/arch/arm/boot/dts/mmp2-brownstone.dts b/arch/arm/boot/dts/mmp2-brownstone.dts index 04f1ae1382..bc64348b82 100644 --- a/arch/arm/boot/dts/mmp2-brownstone.dts +++ b/arch/arm/boot/dts/mmp2-brownstone.dts @@ -28,7 +28,7 @@ &twsi1 { status = "okay"; pmic: max8925@3c { - compatible = "maxium,max8925"; + compatible = "maxim,max8925"; reg = <0x3c>; interrupts = <1>; interrupt-parent = <&intcmux4>; diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 43077e11da..2acf880fcc 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -114,6 +114,10 @@ ENDPROC(cpu_resume_mmu) .popsection cpu_resume_after_mmu: bl cpu_init @ restore the und/abt/irq banked regs +#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) + mov r0, sp + bl kasan_unpoison_task_stack_below +#endif mov r0, #0 @ return zero on success ldmfd sp!, {r4 - r11, pc} ENDPROC(cpu_resume_after_mmu) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi index 639220dbff..685e9b83d4 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi @@ -38,8 +38,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b010000 0x10000>; clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>, - <&sdhc0_lpcg IMX_LPCG_CLK_0>, - <&sdhc0_lpcg IMX_LPCG_CLK_5>; + <&sdhc0_lpcg IMX_LPCG_CLK_5>, + <&sdhc0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_0>; status = "disabled"; @@ -49,8 +49,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b020000 0x10000>; clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>, - <&sdhc1_lpcg IMX_LPCG_CLK_0>, - <&sdhc1_lpcg IMX_LPCG_CLK_5>; + <&sdhc1_lpcg IMX_LPCG_CLK_5>, + <&sdhc1_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_1>; fsl,tuning-start-tap = <20>; @@ -62,8 +62,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b030000 0x10000>; clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>, - <&sdhc2_lpcg IMX_LPCG_CLK_0>, - <&sdhc2_lpcg IMX_LPCG_CLK_5>; + <&sdhc2_lpcg IMX_LPCG_CLK_5>, + <&sdhc2_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_2>; status = "disabled"; diff --git a/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi b/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi index a83b9d4f17..add54f4e7b 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi @@ -58,7 +58,7 @@ gic: interrupt-controller@f1001000 { compatible = "arm,gic-400"; reg = <0x0 0xf1001000 0x0 0x1000>, /* GICD */ - <0x0 0xf1002000 0x0 0x100>; /* GICC */ + <0x0 0xf1002000 0x0 0x2000>; /* GICC */ #address-cells = <0>; #interrupt-cells = <3>; interrupt-controller; diff --git a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts index 9d20cabf4f..99515c13da 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts @@ -127,7 +127,7 @@ }; &pio { - eth_default: eth_default { + eth_default: eth-default-pins { tx_pins { pinmux = , , @@ -154,7 +154,7 @@ }; }; - eth_sleep: eth_sleep { + eth_sleep: eth-sleep-pins { tx_pins { pinmux = , , @@ -180,14 +180,14 @@ }; }; - usb0_id_pins_float: usb0_iddig { + usb0_id_pins_float: usb0-iddig-pins { pins_iddig { pinmux = ; bias-pull-up; }; }; - usb1_id_pins_float: usb1_iddig { + usb1_id_pins_float: usb1-iddig-pins { pins_iddig { pinmux = ; bias-pull-up; diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi index 993a03d7ff..57e9c39fab 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi @@ -249,10 +249,11 @@ #clock-cells = <1>; }; - infracfg: syscon@10001000 { + infracfg: clock-controller@10001000 { compatible = "mediatek,mt2712-infracfg", "syscon"; reg = <0 0x10001000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; pericfg: syscon@10003000 { diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index a4c48b2abd..5f21f1e95d 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -244,7 +244,7 @@ clock-names = "hif_sel"; }; - cir: cir@10009000 { + cir: ir-receiver@10009000 { compatible = "mediatek,mt7622-cir"; reg = <0 0x10009000 0 0x1000>; interrupts = ; @@ -275,16 +275,14 @@ }; }; - apmixedsys: apmixedsys@10209000 { - compatible = "mediatek,mt7622-apmixedsys", - "syscon"; + apmixedsys: clock-controller@10209000 { + compatible = "mediatek,mt7622-apmixedsys"; reg = <0 0x10209000 0 0x1000>; #clock-cells = <1>; }; - topckgen: topckgen@10210000 { - compatible = "mediatek,mt7622-topckgen", - "syscon"; + topckgen: clock-controller@10210000 { + compatible = "mediatek,mt7622-topckgen"; reg = <0 0x10210000 0 0x1000>; #clock-cells = <1>; }; @@ -357,7 +355,7 @@ }; cci_control2: slave-if@5000 { - compatible = "arm,cci-400-ctrl-if"; + compatible = "arm,cci-400-ctrl-if", "syscon"; interface-type = "ace"; reg = <0x5000 0x1000>; }; @@ -507,7 +505,6 @@ <&pericfg CLK_PERI_AUXADC_PD>; clock-names = "therm", "auxadc"; resets = <&pericfg MT7622_PERI_THERM_SW_RST>; - reset-names = "therm"; mediatek,auxadc = <&auxadc>; mediatek,apmixedsys = <&apmixedsys>; nvmem-cells = <&thermal_calibration>; @@ -715,9 +712,8 @@ power-domains = <&scpsys MT7622_POWER_DOMAIN_WB>; }; - ssusbsys: ssusbsys@1a000000 { - compatible = "mediatek,mt7622-ssusbsys", - "syscon"; + ssusbsys: clock-controller@1a000000 { + compatible = "mediatek,mt7622-ssusbsys"; reg = <0 0x1a000000 0 0x1000>; #clock-cells = <1>; #reset-cells = <1>; @@ -774,9 +770,8 @@ }; }; - pciesys: pciesys@1a100800 { - compatible = "mediatek,mt7622-pciesys", - "syscon"; + pciesys: clock-controller@1a100800 { + compatible = "mediatek,mt7622-pciesys"; reg = <0 0x1a100800 0 0x1000>; #clock-cells = <1>; #reset-cells = <1>; @@ -894,7 +889,13 @@ }; }; - ethsys: syscon@1b000000 { + hifsys: clock-controller@1af00000 { + compatible = "mediatek,mt7622-hifsys"; + reg = <0 0x1af00000 0 0x70>; + #clock-cells = <1>; + }; + + ethsys: clock-controller@1b000000 { compatible = "mediatek,mt7622-ethsys", "syscon"; reg = <0 0x1b000000 0 0x1000>; @@ -912,10 +913,28 @@ #dma-cells = <1>; }; - eth: ethernet@1b100000 { - compatible = "mediatek,mt7622-eth", - "mediatek,mt2701-eth", + pcie_mirror: pcie-mirror@10000400 { + compatible = "mediatek,mt7622-pcie-mirror", "syscon"; + reg = <0 0x10000400 0 0x10>; + }; + + wed0: wed@1020a000 { + compatible = "mediatek,mt7622-wed", + "syscon"; + reg = <0 0x1020a000 0 0x1000>; + interrupts = ; + }; + + wed1: wed@1020b000 { + compatible = "mediatek,mt7622-wed", + "syscon"; + reg = <0 0x1020b000 0 0x1000>; + interrupts = ; + }; + + eth: ethernet@1b100000 { + compatible = "mediatek,mt7622-eth"; reg = <0 0x1b100000 0 0x20000>; interrupts = , , @@ -938,6 +957,11 @@ power-domains = <&scpsys MT7622_POWER_DOMAIN_ETHSYS>; mediatek,ethsys = <ðsys>; mediatek,sgmiisys = <&sgmiisys>; + mediatek,cci-control = <&cci_control2>; + mediatek,wed = <&wed0>, <&wed1>; + mediatek,pcie-mirror = <&pcie_mirror>; + mediatek,hifsys = <&hifsys>; + dma-coherent; #address-cells = <1>; #size-cells = <0>; status = "disabled"; diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 2d33f4a583..712ac1826d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1088,6 +1088,7 @@ compatible = "mediatek,mt8183-mfgcfg", "syscon"; reg = <0 0x13000000 0 0x1000>; #clock-cells = <1>; + power-domains = <&spm MT8183_POWER_DOMAIN_MFG_ASYNC>; }; gpu: gpu@13040000 { diff --git a/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts b/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts index 6e5f846566..a5ff8cfedf 100644 --- a/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts +++ b/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts @@ -9,8 +9,8 @@ compatible = "nvidia,norrin", "nvidia,tegra132", "nvidia,tegra124"; aliases { - rtc0 = "/i2c@7000d000/as3722@40"; - rtc1 = "/rtc@7000e000"; + rtc0 = &as3722; + rtc1 = &tegra_rtc; serial0 = &uarta; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra132.dtsi b/arch/arm64/boot/dts/nvidia/tegra132.dtsi index b0bcda8cc5..5bfd497f63 100644 --- a/arch/arm64/boot/dts/nvidia/tegra132.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra132.dtsi @@ -573,7 +573,7 @@ status = "disabled"; }; - rtc@7000e000 { + tegra_rtc: rtc@7000e000 { compatible = "nvidia,tegra124-rtc", "nvidia,tegra20-rtc"; reg = <0x0 0x7000e000 0x0 0x100>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index 7eadecba01..d636718adb 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -958,10 +958,10 @@ interrupts = ; interrupt-names = "msi"; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc 0 135 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &intc 0 136 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &intc 0 138 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &intc 0 139 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &intc 0 0 135 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &intc 0 0 136 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &intc 0 0 138 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &intc 0 0 139 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_PCIE_0_PIPE_CLK>, <&gcc GCC_PCIE_0_MSTR_AXI_CLK>, diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi index a80c578484..b6d70d0073 100644 --- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi @@ -60,7 +60,7 @@ vddrf-supply = <&vreg_l1_1p3>; vddch0-supply = <&vdd_ch0_3p3>; - local-bd-address = [ 02 00 00 00 5a ad ]; + local-bd-address = [ 00 00 00 00 00 00 ]; max-speed = <3200000>; }; diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index a9d36ac6cb..a88798b809 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -911,6 +911,8 @@ ap_spi_fp: &spi10 { vddrf-supply = <&pp1300_l2c>; vddch0-supply = <&pp3300_l10c>; max-speed = <3200000>; + + qcom,local-bd-address-broken; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 15af6c7ad0..6f7061c878 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -1996,10 +1996,10 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc 0 149 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ - <0 0 0 2 &intc 0 150 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ - <0 0 0 3 &intc 0 151 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ - <0 0 0 4 &intc 0 152 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ + interrupt-map = <0 0 0 1 &intc 0 0 0 149 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ + <0 0 0 2 &intc 0 0 0 150 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ + <0 0 0 3 &intc 0 0 0 151 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ + <0 0 0 4 &intc 0 0 0 152 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ clocks = <&gcc GCC_PCIE_0_PIPE_CLK>, <&gcc GCC_PCIE_0_AUX_CLK>, @@ -2101,10 +2101,10 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc 0 434 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ - <0 0 0 2 &intc 0 435 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ - <0 0 0 3 &intc 0 438 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ - <0 0 0 4 &intc 0 439 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ + interrupt-map = <0 0 0 1 &intc 0 0 0 434 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ + <0 0 0 2 &intc 0 0 0 435 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ + <0 0 0 3 &intc 0 0 0 438 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ + <0 0 0 4 &intc 0 0 0 439 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ clocks = <&gcc GCC_PCIE_1_PIPE_CLK>, <&gcc GCC_PCIE_1_AUX_CLK>, diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 3cbe83e6fb..26f02cc70d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -728,11 +728,20 @@ status = "disabled"; ports { - hdmi_in: port { + #address-cells = <1>; + #size-cells = <0>; + + hdmi_in: port@0 { + reg = <0>; + hdmi_in_vop: endpoint { remote-endpoint = <&vop_out_hdmi>; }; }; + + hdmi_out: port@1 { + reg = <1>; + }; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 9e5d07f571..e5d057ef81 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -790,7 +790,6 @@ }; &pcie0 { - bus-scan-delay-ms = <1000>; ep-gpios = <&gpio2 RK_PD4 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 7b27079fd6..b34081d39d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -390,16 +390,22 @@ gpio1830-supply = <&vcc_1v8>; }; -&pmu_io_domains { - status = "okay"; - pmu1830-supply = <&vcc_1v8>; -}; - -&pwm2 { - status = "okay"; +&pcie_clkreqn_cpm { + rockchip,pins = + <2 RK_PD2 RK_FUNC_GPIO &pcfg_pull_up>; }; &pinctrl { + pinctrl-names = "default"; + pinctrl-0 = <&q7_thermal_pin>; + + gpios { + q7_thermal_pin: q7-thermal-pin { + rockchip,pins = + <0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_up>; + }; + }; + i2c8 { i2c8_xfer_a: i2c8-xfer { rockchip,pins = @@ -432,11 +438,20 @@ usb3 { usb3_id: usb3-id { rockchip,pins = - <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; + <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>; }; }; }; +&pmu_io_domains { + status = "okay"; + pmu1830-supply = <&vcc_1v8>; +}; + +&pwm2 { + status = "okay"; +}; + &sdhci { /* * Signal integrity isn't great at 200MHz but 100MHz has proven stable diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 9e33f0e6ed..e98966899f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1801,6 +1801,7 @@ hdmi: hdmi@ff940000 { compatible = "rockchip,rk3399-dw-hdmi"; reg = <0x0 0xff940000 0x0 0x20000>; + reg-io-width = <4>; interrupts = ; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_SFR>, @@ -1809,13 +1810,16 @@ <&cru PLL_VPLL>; clock-names = "iahb", "isfr", "cec", "grf", "vpll"; power-domains = <&power RK3399_PD_HDCP>; - reg-io-width = <4>; rockchip,grf = <&grf>; #sound-dai-cells = <0>; status = "disabled"; ports { - hdmi_in: port { + #address-cells = <1>; + #size-cells = <0>; + + hdmi_in: port@0 { + reg = <0>; #address-cells = <1>; #size-cells = <0>; @@ -1828,6 +1832,10 @@ remote-endpoint = <&vopl_out_hdmi>; }; }; + + hdmi_out: port@1 { + reg = <1>; + }; }; }; diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h index 03f52f84a4..bc2dcc8a00 100644 --- a/arch/arm64/include/asm/asm-bug.h +++ b/arch/arm64/include/asm/asm-bug.h @@ -28,6 +28,7 @@ 14470: .long 14471f - 14470b; \ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ .short flags; \ + .align 2; \ .popsection; \ 14471: #else diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 0277838295..87e782eec9 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -13,6 +13,18 @@ #define KVM_PGTABLE_MAX_LEVELS 4U +/* + * The largest supported block sizes for KVM (no 52-bit PA support): + * - 4K (level 1): 1GB + * - 16K (level 2): 32MB + * - 64K (level 2): 512MB + */ +#ifdef CONFIG_ARM64_4K_PAGES +#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1U +#else +#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U +#endif + static inline u64 kvm_get_parange(u64 mmfr0) { u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, @@ -58,11 +70,7 @@ static inline u64 kvm_granule_size(u32 level) static inline bool kvm_level_supports_block_mapping(u32 level) { - /* - * Reject invalid block mappings and don't bother with 4TB mappings for - * 52-bit PAs. - */ - return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1)); + return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL; } /** diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index fe341a6578..c8dca8ae35 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -10,13 +10,6 @@ #include -/* - * PGDIR_SHIFT determines the size a top-level page table entry can map - * and depends on the number of levels in the page table. Compute the - * PGDIR_SHIFT for a given number of levels. - */ -#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls)) - /* * The hardware supports concatenation of up to 16 tables at stage2 entry * level and we use the feature whenever possible, which means we resolve 4 @@ -30,11 +23,6 @@ #define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4) #define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr) -/* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */ -#define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm)) -#define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm)) -#define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1) - /* * kvm_mmmu_cache_min_pages() is the number of pages required to install * a stage-2 translation. We pre-allocate the entry level page table at @@ -42,12 +30,4 @@ */ #define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1) -static inline phys_addr_t -stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) -{ - phys_addr_t boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm); - - return (boundary - 1 < end - 1) ? boundary : end; -} - #endif /* __ARM64_S2_PGTABLE_H_ */ diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 94108e2e09..05d3f772a9 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -250,6 +250,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case PSR_AA32_MODE_SVC: case PSR_AA32_MODE_ABT: case PSR_AA32_MODE_UND: + case PSR_AA32_MODE_SYS: if (!vcpu_el1_is_32bit(vcpu)) return -EINVAL; break; @@ -270,7 +271,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { int i, nr_reg; - switch (*vcpu_cpsr(vcpu)) { + switch (*vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK) { /* * Either we are dealing with user mode, and only the * first 15 registers (+ PC) must be narrowed to 32bit. diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 38a8095744..db667b4ad1 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -31,6 +31,13 @@ static phys_addr_t hyp_idmap_vector; static unsigned long io_map_base; +static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL); + phys_addr_t boundary = ALIGN_DOWN(addr + size, size); + + return (boundary - 1 < end - 1) ? boundary : end; +} /* * Release kvm_mmu_lock periodically if the memory region is large. Otherwise, @@ -52,7 +59,7 @@ static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr, if (!pgt) return -EINVAL; - next = stage2_pgd_addr_end(kvm, addr, end); + next = stage2_range_addr_end(addr, end); ret = fn(pgt, addr, next - addr); if (ret) break; diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 7740995de9..e80b638b78 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -284,16 +284,12 @@ int kvm_register_vgic_device(unsigned long type) int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, struct vgic_reg_attr *reg_attr) { - int cpuid; + int cpuid = FIELD_GET(KVM_DEV_ARM_VGIC_CPUID_MASK, attr->attr); - cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> - KVM_DEV_ARM_VGIC_CPUID_SHIFT; - - if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) - return -EINVAL; - - reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid); reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid); + if (!reg_attr->vcpu) + return -EINVAL; return 0; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index a3bacd7950..f0779d0f34 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -211,9 +211,6 @@ bool kernel_page_present(struct page *page) pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); - if (!can_set_direct_map()) - return true; - pgdp = pgd_offset_k(addr); if (pgd_none(READ_ONCE(*pgdp))) return false; diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 57465bff1f..df7f349c8d 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -64,6 +64,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG ELF_DETAILS + .hexagon.attributes 0 : { *(.hexagon.attributes) } DISCARDS } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 89869aff8c..8902d15178 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -63,6 +63,7 @@ config IA64 select PCI_MSI_ARCH_FALLBACKS if PCI_MSI select SET_FS select ZONE_DMA32 + select FUNCTION_ALIGNMENT_32B default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 7e548c654a..43cde968da 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -23,7 +23,7 @@ KBUILD_AFLAGS_KERNEL := -mconstant-gp EXTRA := cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \ - -falign-functions=32 -frename-registers -fno-optimize-sibling-calls + -frename-registers -fno-optimize-sibling-calls KBUILD_CFLAGS_KERNEL := -mconstant-gp GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)") diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 9f3663faca..198c4c919c 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -430,7 +430,9 @@ resume: movec %a0,%dfc /* restore status register */ - movew %a1@(TASK_THREAD+THREAD_SR),%sr + movew %a1@(TASK_THREAD+THREAD_SR),%d0 + oriw #0x0700,%d0 + movew %d0,%sr rts diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c index 4fab347917..060394b000 100644 --- a/arch/m68k/mac/misc.c +++ b/arch/m68k/mac/misc.c @@ -451,30 +451,18 @@ void mac_poweroff(void) void mac_reset(void) { - if (macintosh_config->adb_type == MAC_ADB_II && - macintosh_config->ident != MAC_MODEL_SE30) { - /* need ROMBASE in booter */ - /* indeed, plus need to MAP THE ROM !! */ - - if (mac_bi_data.rombase == 0) - mac_bi_data.rombase = 0x40800000; - - /* works on some */ - rom_reset = (void *) (mac_bi_data.rombase + 0xa); - - local_irq_disable(); - rom_reset(); #ifdef CONFIG_ADB_CUDA - } else if (macintosh_config->adb_type == MAC_ADB_EGRET || - macintosh_config->adb_type == MAC_ADB_CUDA) { + if (macintosh_config->adb_type == MAC_ADB_EGRET || + macintosh_config->adb_type == MAC_ADB_CUDA) { cuda_restart(); + } else #endif #ifdef CONFIG_ADB_PMU - } else if (macintosh_config->adb_type == MAC_ADB_PB2) { + if (macintosh_config->adb_type == MAC_ADB_PB2) { pmu_restart(); + } else #endif - } else if (CPU_IS_030) { - + if (CPU_IS_030) { /* 030-specific reset routine. The idea is general, but the * specific registers to reset are '030-specific. Until I * have a non-030 machine, I can't test anything else. @@ -522,6 +510,18 @@ void mac_reset(void) "jmp %/a0@\n\t" /* jump to the reset vector */ ".chip 68k" : : "r" (offset), "a" (rombase) : "a0"); + } else { + /* need ROMBASE in booter */ + /* indeed, plus need to MAP THE ROM !! */ + + if (mac_bi_data.rombase == 0) + mac_bi_data.rombase = 0x40800000; + + /* works on some */ + rom_reset = (void *)(mac_bi_data.rombase + 0xa); + + local_irq_disable(); + rom_reset(); } /* should never get here */ diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile index 15a20eb814..46dcc3b6a0 100644 --- a/arch/microblaze/kernel/Makefile +++ b/arch/microblaze/kernel/Makefile @@ -7,7 +7,6 @@ ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code and low level code CFLAGS_REMOVE_timer.o = -pg CFLAGS_REMOVE_intc.o = -pg -CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_process.o = -pg endif diff --git a/arch/microblaze/kernel/cpu/cpuinfo-static.c b/arch/microblaze/kernel/cpu/cpuinfo-static.c index 85dbda4a08..03da36dc6d 100644 --- a/arch/microblaze/kernel/cpu/cpuinfo-static.c +++ b/arch/microblaze/kernel/cpu/cpuinfo-static.c @@ -18,7 +18,7 @@ static const char family_string[] = CONFIG_XILINX_MICROBLAZE0_FAMILY; static const char cpu_ver_string[] = CONFIG_XILINX_MICROBLAZE0_HW_VER; #define err_printk(x) \ - early_printk("ERROR: Microblaze " x "-different for kernel and DTS\n"); + pr_err("ERROR: Microblaze " x "-different for kernel and DTS\n"); void __init set_cpuinfo_static(struct cpuinfo *ci, struct device_node *cpu) { diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index b3e4dd6be7..428b9f1cf1 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -157,7 +157,7 @@ static inline long regs_return_value(struct pt_regs *regs) #define instruction_pointer(regs) ((regs)->cp0_epc) #define profile_pc(regs) instruction_pointer(regs) -extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall); +extern asmlinkage long syscall_trace_enter(struct pt_regs *regs); extern asmlinkage void syscall_trace_leave(struct pt_regs *regs); extern void die(const char *, struct pt_regs *) __noreturn; diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index 04ca75278f..6cd0246aa2 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -98,6 +98,7 @@ void output_thread_info_defines(void) OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PRE_COUNT, thread_info, preempt_count); OFFSET(TI_REGS, thread_info, regs); + OFFSET(TI_SYSCALL, thread_info, syscall); DEFINE(_THREAD_SIZE, THREAD_SIZE); DEFINE(_THREAD_MASK, THREAD_MASK); DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index db7c5be1d4..dd454b429f 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -1310,16 +1310,13 @@ long arch_ptrace(struct task_struct *child, long request, * Notification of system call entry/exit * - triggered by current->work.syscall_trace */ -asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) +asmlinkage long syscall_trace_enter(struct pt_regs *regs) { user_exit(); - current_thread_info()->syscall = syscall; - if (test_thread_flag(TIF_SYSCALL_TRACE)) { if (tracehook_report_syscall_entry(regs)) return -1; - syscall = current_thread_info()->syscall; } #ifdef CONFIG_SECCOMP @@ -1328,7 +1325,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) struct seccomp_data sd; unsigned long args[6]; - sd.nr = syscall; + sd.nr = current_thread_info()->syscall; sd.arch = syscall_get_arch(current); syscall_get_arguments(current, regs, args); for (i = 0; i < 6; i++) @@ -1338,23 +1335,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) ret = __secure_computing(&sd); if (ret == -1) return ret; - syscall = current_thread_info()->syscall; } #endif if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->regs[2]); - audit_syscall_entry(syscall, regs->regs[4], regs->regs[5], + audit_syscall_entry(current_thread_info()->syscall, + regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); /* * Negative syscall numbers are mistaken for rejected syscalls, but * won't have had the return value set appropriately, so we do so now. */ - if (syscall < 0) + if (current_thread_info()->syscall < 0) syscall_set_return_value(current, regs, -ENOSYS, 0); - return syscall; + return current_thread_info()->syscall; } /* diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 9bfce5f75f..6c14160cd8 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -78,6 +78,18 @@ loads_done: PTR_WD load_a7, bad_stack_a7 .previous + /* + * syscall number is in v0 unless we called syscall(__NR_###) + * where the real syscall number is in a0 + */ + subu t2, v0, __NR_O32_Linux + bnez t2, 1f /* __NR_syscall at offset 0 */ + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number + b 2f +1: + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number +2: + lw t0, TI_FLAGS($28) # syscall tracing enabled? li t1, _TIF_WORK_SYSCALL_ENTRY and t0, t1 @@ -115,16 +127,7 @@ syscall_trace_entry: SAVE_STATIC move a0, sp - /* - * syscall number is in v0 unless we called syscall(__NR_###) - * where the real syscall number is in a0 - */ - move a1, v0 - subu t2, v0, __NR_O32_Linux - bnez t2, 1f /* __NR_syscall at offset 0 */ - lw a1, PT_R4(sp) - -1: jal syscall_trace_enter + jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 97456b2ca7..9778885923 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting + LONG_S v0, TI_SYSCALL($28) # Store syscall number + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -72,7 +74,6 @@ syscall_common: n32_syscall_trace_entry: SAVE_STATIC move a0, sp - move a1, v0 jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S index 5f6ed4b4c3..db58115385 100644 --- a/arch/mips/kernel/scall64-n64.S +++ b/arch/mips/kernel/scall64-n64.S @@ -47,6 +47,8 @@ NESTED(handle_sys64, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting + LONG_S v0, TI_SYSCALL($28) # Store syscall number + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -83,7 +85,6 @@ n64_syscall_exit: syscall_trace_entry: SAVE_STATIC move a0, sp - move a1, v0 jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index d3c2616cba..7a5abb73e5 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -79,6 +79,22 @@ loads_done: PTR_WD load_a7, bad_stack_a7 .previous + /* + * absolute syscall number is in v0 unless we called syscall(__NR_###) + * where the real syscall number is in a0 + * note: NR_syscall is the first O32 syscall but the macro is + * only defined when compiling with -mabi=32 (CONFIG_32BIT) + * therefore __NR_O32_Linux is used (4000) + */ + + subu t2, v0, __NR_O32_Linux + bnez t2, 1f /* __NR_syscall at offset 0 */ + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number + b 2f +1: + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number +2: + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -113,22 +129,7 @@ trace_a_syscall: sd a7, PT_R11(sp) # For indirect syscalls move a0, sp - /* - * absolute syscall number is in v0 unless we called syscall(__NR_###) - * where the real syscall number is in a0 - * note: NR_syscall is the first O32 syscall but the macro is - * only defined when compiling with -mabi=32 (CONFIG_32BIT) - * therefore __NR_O32_Linux is used (4000) - */ - .set push - .set reorder - subu t1, v0, __NR_O32_Linux - move a1, v0 - bnez t1, 1f /* __NR_syscall at offset 0 */ - ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ - .set pop - -1: jal syscall_trace_enter + jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index a82b2caaa5..b3edbb33b6 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -74,10 +74,10 @@ void *arch_dma_set_uncached(void *cpu_addr, size_t size) * We need to iterate through the pages, clearing the dcache for * them and setting the cache-inhibit bit. */ - mmap_read_lock(&init_mm); - error = walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops, - NULL); - mmap_read_unlock(&init_mm); + mmap_write_lock(&init_mm); + error = walk_page_range_novma(&init_mm, va, va + size, + &set_nocache_walk_ops, NULL, NULL); + mmap_write_unlock(&init_mm); if (error) return ERR_PTR(error); @@ -88,11 +88,11 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size) { unsigned long va = (unsigned long)cpu_addr; - mmap_read_lock(&init_mm); + mmap_write_lock(&init_mm); /* walk_page_range shouldn't be able to fail here */ - WARN_ON(walk_page_range(&init_mm, va, va + size, - &clear_nocache_walk_ops, NULL)); - mmap_read_unlock(&init_mm); + WARN_ON(walk_page_range_novma(&init_mm, va, va + size, + &clear_nocache_walk_ops, NULL, NULL)); + mmap_write_unlock(&init_mm); } void arch_sync_dma_for_device(phys_addr_t addr, size_t size, diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index fd8c1ebd27..12b9236eff 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -79,26 +79,28 @@ * version takes two arguments: a src and destination register. * However, the source and destination registers can not be * the same register. + * + * We use add,l to avoid clobbering the C/B bits in the PSW. */ .macro tophys grvirt, grphys - ldil L%(__PAGE_OFFSET), \grphys - sub \grvirt, \grphys, \grphys + ldil L%(-__PAGE_OFFSET), \grphys + addl \grvirt, \grphys, \grphys .endm - + .macro tovirt grphys, grvirt ldil L%(__PAGE_OFFSET), \grvirt - add \grphys, \grvirt, \grvirt + addl \grphys, \grvirt, \grvirt .endm .macro tophys_r1 gr - ldil L%(__PAGE_OFFSET), %r1 - sub \gr, %r1, \gr + ldil L%(-__PAGE_OFFSET), %r1 + addl \gr, %r1, \gr .endm - + .macro tovirt_r1 gr ldil L%(__PAGE_OFFSET), %r1 - add \gr, %r1, \gr + addl \gr, %r1, \gr .endm .macro delay value diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index 3c43baca7b..2aceebcd69 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -40,7 +40,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) " addc %0, %5, %0\n" " addc %0, %3, %0\n" "1: ldws,ma 4(%1), %3\n" -" addib,< 0, %2, 1b\n" +" addib,> -1, %2, 1b\n" " addc %0, %3, %0\n" "\n" " extru %0, 31, 16, %4\n" @@ -126,6 +126,7 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, ** Try to keep 4 registers with "live" values ahead of the ALU. */ +" depdi 0, 31, 32, %0\n"/* clear upper half of incoming checksum */ " ldd,ma 8(%1), %4\n" /* get 1st saddr word */ " ldd,ma 8(%2), %5\n" /* get 1st daddr word */ " add %4, %0, %0\n" @@ -137,8 +138,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " add,dc %3, %0, %0\n" /* fold in proto+len | carry bit */ " extrd,u %0, 31, 32, %4\n"/* copy upper half down */ " depdi 0, 31, 32, %0\n"/* clear upper half */ -" add %4, %0, %0\n" /* fold into 32-bits */ -" addc 0, %0, %0\n" /* add carry */ +" add,dc %4, %0, %0\n" /* fold into 32-bits, plus carry */ +" addc 0, %0, %0\n" /* add final carry */ #else @@ -163,7 +164,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " ldw,ma 4(%2), %7\n" /* 4th daddr */ " addc %6, %0, %0\n" " addc %7, %0, %0\n" -" addc %3, %0, %0\n" /* fold in proto+len, catch carry */ +" addc %3, %0, %0\n" /* fold in proto+len */ +" addc 0, %0, %0\n" /* add carry */ #endif : "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len), diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 00297e8e1c..317508493b 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -21,6 +21,7 @@ EXPORT_SYMBOL(memset); #include EXPORT_SYMBOL(__xchg8); EXPORT_SYMBOL(__xchg32); +EXPORT_SYMBOL(__cmpxchg_u8); EXPORT_SYMBOL(__cmpxchg_u32); EXPORT_SYMBOL(__cmpxchg_u64); #ifdef CONFIG_SMP diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 9bcf345cb2..c25f160bb9 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -513,7 +513,7 @@ struct hvcall_mpp_data { unsigned long backing_mem; }; -int h_get_mpp(struct hvcall_mpp_data *); +long h_get_mpp(struct hvcall_mpp_data *mpp_data); struct hvcall_mpp_x_data { unsigned long coalesced_bytes; diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h index a21f529c43..8359c06d92 100644 --- a/arch/powerpc/include/asm/reg_fsl_emb.h +++ b/arch/powerpc/include/asm/reg_fsl_emb.h @@ -12,9 +12,16 @@ #ifndef __ASSEMBLY__ /* Performance Monitor Registers */ #define mfpmr(rn) ({unsigned int rval; \ - asm volatile("mfpmr %0," __stringify(rn) \ + asm volatile(".machine push; " \ + ".machine e300; " \ + "mfpmr %0," __stringify(rn) ";" \ + ".machine pop; " \ : "=r" (rval)); rval;}) -#define mtpmr(rn, v) asm volatile("mtpmr " __stringify(rn) ",%0" : : "r" (v)) +#define mtpmr(rn, v) asm volatile(".machine push; " \ + ".machine e300; " \ + "mtpmr " __stringify(rn) ",%0; " \ + ".machine pop; " \ + : : "r" (v)) #endif /* __ASSEMBLY__ */ /* Freescale Book E Performance Monitor APU Registers */ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 6448de85f7..2a4f3d09a0 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -67,6 +67,6 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o -CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec) +CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec) obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 6c196b9413..c2fff9a339 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1886,10 +1886,10 @@ notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) * h_get_mpp * H_GET_MPP hcall returns info in 7 parms */ -int h_get_mpp(struct hvcall_mpp_data *mpp_data) +long h_get_mpp(struct hvcall_mpp_data *mpp_data) { - int rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; rc = plpar_hcall9(H_GET_MPP, retbuf); diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index a291b5a94d..fea4dfa54e 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -112,8 +112,8 @@ struct hvcall_ppp_data { */ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) { - unsigned long rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; rc = plpar_hcall9(H_GET_PPP, retbuf); @@ -192,7 +192,7 @@ static void parse_ppp_data(struct seq_file *m) struct hvcall_ppp_data ppp_data; struct device_node *root; const __be32 *perf_level; - int rc; + long rc; rc = h_get_ppp(&ppp_data); if (rc) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index c55ccec0a1..d9d3668293 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -569,10 +569,12 @@ static const struct fsl_msi_feature ipic_msi_feature = { .msiir_offset = 0x38, }; +#ifdef CONFIG_EPAPR_PARAVIRT static const struct fsl_msi_feature vmpic_msi_feature = { .fsl_pic_ip = FSL_PIC_IP_VMPIC, .msiir_offset = 0, }; +#endif static const struct of_device_id fsl_of_msi_ids[] = { { diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 9a3d9b68f2..776528fd64 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -675,8 +675,8 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define PAGE_SHARED __pgprot(0) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL -#define TASK_SIZE 0xffffffffUL -#define VMALLOC_START 0 +#define TASK_SIZE _AC(-1, UL) +#define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE #endif /* !CONFIG_MMU */ diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index d4d628af21..0a38801c52 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -467,7 +467,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ if (unlikely(__kr_err)) \ @@ -476,7 +476,7 @@ do { \ #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ if (unlikely(__kr_err)) \ diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 5ca2860cc0..851c967c49 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -231,7 +231,7 @@ ret_from_syscall_rejected: andi t0, t0, _TIF_SYSCALL_WORK bnez t0, handle_syscall_trace_exit -ret_from_exception: +SYM_CODE_START_NOALIGN(ret_from_exception) REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE #ifdef CONFIG_TRACE_IRQFLAGS @@ -245,6 +245,7 @@ ret_from_exception: andi s0, s0, SR_SPP #endif bnez s0, resume_kernel +SYM_CODE_END(ret_from_exception) resume_userspace: /* Interrupts must be disabled here so flags are checked atomically */ diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index bda3bc2947..1dea5b26f7 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -24,8 +24,6 @@ #include #include -register unsigned long gp_in_global __asm__("gp"); - #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include unsigned long __stack_chk_guard __read_mostly; @@ -130,7 +128,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 894ae66421..94721c484d 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -18,6 +18,18 @@ register unsigned long sp_in_global __asm__("sp"); #ifdef CONFIG_FRAME_POINTER +extern asmlinkage void ret_from_exception(void); + +static inline int fp_is_valid(unsigned long fp, unsigned long sp) +{ + unsigned long low, high; + + low = sp + sizeof(struct stackframe); + high = ALIGN(sp, THREAD_SIZE); + + return !(fp < low || fp > high || fp & 0x07); +} + void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { @@ -41,27 +53,32 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, } for (;;) { - unsigned long low, high; struct stackframe *frame; if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc)))) break; - /* Validate frame pointer */ - low = sp + sizeof(struct stackframe); - high = ALIGN(sp, THREAD_SIZE); - if (unlikely(fp < low || fp > high || fp & 0x7)) + if (unlikely(!fp_is_valid(fp, sp))) break; + /* Unwind stack frame */ frame = (struct stackframe *)fp - 1; sp = fp; - if (regs && (regs->epc == pc) && (frame->fp & 0x7)) { + if (regs && (regs->epc == pc) && fp_is_valid(frame->ra, sp)) { + /* We hit function where ra is not saved on the stack */ fp = frame->ra; pc = regs->ra; } else { fp = frame->fp; pc = ftrace_graph_ret_addr(current, NULL, frame->ra, &frame->ra); + if (pc == (unsigned long)ret_from_exception) { + if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + break; + + pc = ((struct pt_regs *)sp)->epc; + fp = ((struct pt_regs *)sp)->s0; + } } } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 1aa11a8f57..05fed61c5e 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -27,7 +27,6 @@ int __bootdata(is_full_image) = 1; struct initrd_data __bootdata(initrd_data); u64 __bootdata_preserved(stfle_fac_list[16]); -u64 __bootdata_preserved(alt_stfle_fac_list[16]); struct oldmem_data __bootdata_preserved(oldmem_data); void error(char *x) diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 646b12981f..0f6ff2008a 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -161,28 +161,86 @@ typedef struct { unsigned char bytes[16]; } cpacf_mask_t; -/** - * cpacf_query() - check if a specific CPACF function is available - * @opcode: the opcode of the crypto instruction - * @func: the function code to test for - * - * Executes the query function for the given crypto instruction @opcode - * and checks if @func is available - * - * Returns 1 if @func is available for @opcode, 0 otherwise +/* + * Prototype for a not existing function to produce a link + * error if __cpacf_query() or __cpacf_check_opcode() is used + * with an invalid compile time const opcode. */ -static __always_inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask) +void __cpacf_bad_opcode(void); + +static __always_inline void __cpacf_query_rre(u32 opc, u8 r1, u8 r2, + cpacf_mask_t *mask) { asm volatile( - " lghi 0,0\n" /* query function */ - " lgr 1,%[mask]\n" - " spm 0\n" /* pckmo doesn't change the cc */ - /* Parameter regs are ignored, but must be nonzero and unique */ - "0: .insn rrf,%[opc] << 16,2,4,6,0\n" - " brc 1,0b\n" /* handle partial completion */ - : "=m" (*mask) - : [mask] "d" ((unsigned long)mask), [opc] "i" (opcode) - : "cc", "0", "1"); + " la %%r1,%[mask]\n" + " xgr %%r0,%%r0\n" + " .insn rre,%[opc] << 16,%[r1],%[r2]\n" + : [mask] "=R" (*mask) + : [opc] "i" (opc), + [r1] "i" (r1), [r2] "i" (r2) + : "cc", "r0", "r1"); +} + +static __always_inline void __cpacf_query_rrf(u32 opc, + u8 r1, u8 r2, u8 r3, u8 m4, + cpacf_mask_t *mask) +{ + asm volatile( + " la %%r1,%[mask]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,%[opc] << 16,%[r1],%[r2],%[r3],%[m4]\n" + : [mask] "=R" (*mask) + : [opc] "i" (opc), [r1] "i" (r1), [r2] "i" (r2), + [r3] "i" (r3), [m4] "i" (m4) + : "cc", "r0", "r1"); +} + +static __always_inline void __cpacf_query(unsigned int opcode, + cpacf_mask_t *mask) +{ + switch (opcode) { + case CPACF_KDSA: + __cpacf_query_rre(CPACF_KDSA, 0, 2, mask); + break; + case CPACF_KIMD: + __cpacf_query_rre(CPACF_KIMD, 0, 2, mask); + break; + case CPACF_KLMD: + __cpacf_query_rre(CPACF_KLMD, 0, 2, mask); + break; + case CPACF_KM: + __cpacf_query_rre(CPACF_KM, 2, 4, mask); + break; + case CPACF_KMA: + __cpacf_query_rrf(CPACF_KMA, 2, 4, 6, 0, mask); + break; + case CPACF_KMAC: + __cpacf_query_rre(CPACF_KMAC, 0, 2, mask); + break; + case CPACF_KMC: + __cpacf_query_rre(CPACF_KMC, 2, 4, mask); + break; + case CPACF_KMCTR: + __cpacf_query_rrf(CPACF_KMCTR, 2, 4, 6, 0, mask); + break; + case CPACF_KMF: + __cpacf_query_rre(CPACF_KMF, 2, 4, mask); + break; + case CPACF_KMO: + __cpacf_query_rre(CPACF_KMO, 2, 4, mask); + break; + case CPACF_PCC: + __cpacf_query_rre(CPACF_PCC, 0, 0, mask); + break; + case CPACF_PCKMO: + __cpacf_query_rre(CPACF_PCKMO, 0, 0, mask); + break; + case CPACF_PRNO: + __cpacf_query_rre(CPACF_PRNO, 2, 4, mask); + break; + default: + __cpacf_bad_opcode(); + } } static __always_inline int __cpacf_check_opcode(unsigned int opcode) @@ -206,10 +264,21 @@ static __always_inline int __cpacf_check_opcode(unsigned int opcode) case CPACF_KMA: return test_facility(146); /* check for MSA8 */ default: - BUG(); + __cpacf_bad_opcode(); + return 0; } } +/** + * cpacf_query() - check if a specific CPACF function is available + * @opcode: the opcode of the crypto instruction + * @func: the function code to test for + * + * Executes the query function for the given crypto instruction @opcode + * and checks if @func is available + * + * Returns 1 if @func is available for @opcode, 0 otherwise + */ static __always_inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask) { if (__cpacf_check_opcode(opcode)) { diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h index 4f21ae561e..390906b8e3 100644 --- a/arch/s390/include/asm/dwarf.h +++ b/arch/s390/include/asm/dwarf.h @@ -9,6 +9,7 @@ #define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset #define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset #define CFI_RESTORE .cfi_restore +#define CFI_REL_OFFSET .cfi_rel_offset #ifdef CONFIG_AS_CFI_VAL_OFFSET #define CFI_VAL_OFFSET .cfi_val_offset diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index d530eb4dc4..74ef903f94 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -685,6 +685,7 @@ ENDPROC(stack_overflow) .Lthis_cpu: .short 0 .Lstosm_tmp: .byte 0 .section .rodata, "a" + .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame .globl sys_call_table sys_call_table: diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 834b1ec5dd..d2ba82873a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -833,8 +833,8 @@ static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj, scpdata_len += padding; } - reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len; - reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len; + reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN + scpdata_len; + reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN + scpdata_len; reipl_block_nvme->nvme.scp_data_len = scpdata_len; return count; @@ -1603,9 +1603,9 @@ static int __init dump_nvme_init(void) } dump_block_nvme->hdr.len = IPL_BP_NVME_LEN; dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION; - dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN; - dump_block_nvme->fcp.pbt = IPL_PBT_NVME; - dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP; + dump_block_nvme->nvme.len = IPL_BP0_NVME_LEN; + dump_block_nvme->nvme.pbt = IPL_PBT_NVME; + dump_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_DUMP; dump_capabilities |= DUMP_TYPE_NVME; return 0; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b7ce6c7c84..50cb4c3d36 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -154,7 +154,7 @@ unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); -u64 __bootdata_preserved(alt_stfle_fac_list[16]); +u64 alt_stfle_fac_list[16]; struct oldmem_data __bootdata_preserved(oldmem_data); unsigned long VMALLOC_START; diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index cc513add48..6056f2ae02 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -20,7 +20,10 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) KBUILD_AFLAGS_32 += -m31 -s KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin +KBUILD_CFLAGS_32 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -fasynchronous-unwind-tables LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \ --hash-style=both --build-id=sha1 -melf_s390 -T diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 42d918d50a..498d56757c 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -25,7 +25,11 @@ KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin +KBUILD_CFLAGS_64 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_64)) +KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64)) +KBUILD_CFLAGS_64 := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_64)) +KBUILD_CFLAGS_64 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_64)) +KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables ldflags-y := -shared -soname=linux-vdso64.so.1 \ --hash-style=both --build-id=sha1 -T diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S index 97f0c0a669..0625381359 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -23,8 +23,10 @@ __kernel_\func: CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE) CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD stg %r14,STACK_FRAME_OVERHEAD(%r15) + CFI_REL_OFFSET 14, STACK_FRAME_OVERHEAD brasl %r14,__s390_vdso_\func lg %r14,STACK_FRAME_OVERHEAD(%r15) + CFI_RESTORE 14 aghi %r15,WRAPPER_FRAME_SIZE CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD CFI_RESTORE 15 diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index a2c872de29..32d9db5e6f 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2632,7 +2632,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, return 0; start = pmd_val(*pmd) & HPAGE_MASK; - end = start + HPAGE_SIZE - 1; + end = start + HPAGE_SIZE; __storage_key_init_range(start, end); set_bit(PG_arch_1, &page->flags); cond_resched(); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index da36d13ffc..8631307d3d 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -146,7 +146,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) } if (!test_and_set_bit(PG_arch_1, &page->flags)) - __storage_key_init_range(paddr, paddr + size - 1); + __storage_key_init_range(paddr, paddr + size); } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 1a374d021e..88020b4ddb 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1229,8 +1229,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64), \ (insn->imm & BPF_FETCH) ? src_reg : REG_W0, \ src_reg, dst_reg, off); \ - if (is32 && (insn->imm & BPF_FETCH)) \ - EMIT_ZERO(src_reg); \ + if (insn->imm & BPF_FETCH) { \ + /* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */ \ + _EMIT2(0x07e0); \ + if (is32) \ + EMIT_ZERO(src_reg); \ + } \ } while (0) case BPF_ADD: case BPF_ADD | BPF_FETCH: diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c index 1c7f358ef0..5db45517bb 100644 --- a/arch/sh/kernel/kprobes.c +++ b/arch/sh/kernel/kprobes.c @@ -44,17 +44,12 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (OPCODE_RTE(opcode)) return -EFAULT; /* Bad breakpoint */ + memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = opcode; return 0; } -void __kprobes arch_copy_kprobe(struct kprobe *p) -{ - memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - p->opcode = *p->addr; -} - void __kprobes arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; diff --git a/arch/sh/lib/checksum.S b/arch/sh/lib/checksum.S index 3e07074e00..06fed5a21e 100644 --- a/arch/sh/lib/checksum.S +++ b/arch/sh/lib/checksum.S @@ -33,7 +33,8 @@ */ /* - * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); + * unsigned int csum_partial(const unsigned char *buf, int len, + * unsigned int sum); */ .text @@ -45,31 +46,11 @@ ENTRY(csum_partial) * Fortunately, it is easy to convert 2-byte alignment to 4-byte * alignment for the unrolled loop. */ + mov r5, r1 mov r4, r0 - tst #3, r0 ! Check alignment. - bt/s 2f ! Jump if alignment is ok. - mov r4, r7 ! Keep a copy to check for alignment + tst #2, r0 ! Check alignment. + bt 2f ! Jump if alignment is ok. ! - tst #1, r0 ! Check alignment. - bt 21f ! Jump if alignment is boundary of 2bytes. - - ! buf is odd - tst r5, r5 - add #-1, r5 - bt 9f - mov.b @r4+, r0 - extu.b r0, r0 - addc r0, r6 ! t=0 from previous tst - mov r6, r0 - shll8 r6 - shlr16 r0 - shlr8 r0 - or r0, r6 - mov r4, r0 - tst #2, r0 - bt 2f -21: - ! buf is 2 byte aligned (len could be 0) add #-2, r5 ! Alignment uses up two bytes. cmp/pz r5 ! bt/s 1f ! Jump if we had at least two bytes. @@ -77,17 +58,16 @@ ENTRY(csum_partial) bra 6f add #2, r5 ! r5 was < 2. Deal with it. 1: + mov r5, r1 ! Save new len for later use. mov.w @r4+, r0 extu.w r0, r0 addc r0, r6 bf 2f add #1, r6 2: - ! buf is 4 byte aligned (len could be 0) - mov r5, r1 mov #-5, r0 - shld r0, r1 - tst r1, r1 + shld r0, r5 + tst r5, r5 bt/s 4f ! if it's =0, go to 4f clrt .align 2 @@ -109,31 +89,30 @@ ENTRY(csum_partial) addc r0, r6 addc r2, r6 movt r0 - dt r1 + dt r5 bf/s 3b cmp/eq #1, r0 - ! here, we know r1==0 - addc r1, r6 ! add carry to r6 + ! here, we know r5==0 + addc r5, r6 ! add carry to r6 4: - mov r5, r0 + mov r1, r0 and #0x1c, r0 tst r0, r0 - bt 6f - ! 4 bytes or more remaining - mov r0, r1 - shlr2 r1 + bt/s 6f + mov r0, r5 + shlr2 r5 mov #0, r2 5: addc r2, r6 mov.l @r4+, r2 movt r0 - dt r1 + dt r5 bf/s 5b cmp/eq #1, r0 addc r2, r6 - addc r1, r6 ! r1==0 here, so it means add carry-bit + addc r5, r6 ! r5==0 here, so it means add carry-bit 6: - ! 3 bytes or less remaining + mov r1, r5 mov #3, r0 and r0, r5 tst r5, r5 @@ -159,16 +138,6 @@ ENTRY(csum_partial) mov #0, r0 addc r0, r6 9: - ! Check if the buffer was misaligned, if so realign sum - mov r7, r0 - tst #1, r0 - bt 10f - mov r6, r0 - shll8 r6 - shlr16 r0 - shlr8 r0 - or r0, r6 -10: rts mov r6, r0 diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index e75783b6ab..16ab904616 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -47,7 +47,6 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask); int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) -void smp_fill_in_cpu_possible_map(void); void smp_fill_in_sib_core_maps(void); void cpu_play_dead(void); @@ -77,7 +76,6 @@ void __cpu_die(unsigned int cpu); #define smp_fill_in_sib_core_maps() do { } while (0) #define smp_fetch_global_regs() do { } while (0) #define smp_fetch_global_pmu() do { } while (0) -#define smp_fill_in_cpu_possible_map() do { } while (0) #define smp_init_cpu_poke() do { } while (0) #define scheduler_poke() do { } while (0) diff --git a/arch/sparc/include/uapi/asm/termbits.h b/arch/sparc/include/uapi/asm/termbits.h index ce5ad5d0f1..0614e179bc 100644 --- a/arch/sparc/include/uapi/asm/termbits.h +++ b/arch/sparc/include/uapi/asm/termbits.h @@ -13,16 +13,6 @@ typedef unsigned int tcflag_t; typedef unsigned long tcflag_t; #endif -#define NCC 8 -struct termio { - unsigned short c_iflag; /* input mode flags */ - unsigned short c_oflag; /* output mode flags */ - unsigned short c_cflag; /* control mode flags */ - unsigned short c_lflag; /* local mode flags */ - unsigned char c_line; /* line discipline */ - unsigned char c_cc[NCC]; /* control characters */ -}; - #define NCCS 17 struct termios { tcflag_t c_iflag; /* input mode flags */ diff --git a/arch/sparc/include/uapi/asm/termios.h b/arch/sparc/include/uapi/asm/termios.h index ee86f4093d..cceb322608 100644 --- a/arch/sparc/include/uapi/asm/termios.h +++ b/arch/sparc/include/uapi/asm/termios.h @@ -40,5 +40,14 @@ struct winsize { unsigned short ws_ypixel; }; +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; #endif /* _UAPI_SPARC_TERMIOS_H */ diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 060fff95a3..fbf25e926f 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -274,7 +274,7 @@ static int __init setup_nmi_watchdog(char *str) if (!strncmp(str, "panic", 5)) panic_on_timeout = 1; - return 0; + return 1; } __setup("nmi_watchdog=", setup_nmi_watchdog); diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c index f883a50fa3..4eae633f71 100644 --- a/arch/sparc/kernel/prom_64.c +++ b/arch/sparc/kernel/prom_64.c @@ -483,7 +483,9 @@ static void *record_one_cpu(struct device_node *dp, int cpuid, int arg) ncpus_probed++; #ifdef CONFIG_SMP set_cpu_present(cpuid, true); - set_cpu_possible(cpuid, true); + + if (num_possible_cpus() < nr_cpu_ids) + set_cpu_possible(cpuid, true); #endif return NULL; } diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 48abee4eee..9e6e7f983d 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -684,7 +684,6 @@ void __init setup_arch(char **cmdline_p) paging_init(); init_sparc64_elf_hwcap(); - smp_fill_in_cpu_possible_map(); /* * Once the OF device tree and MDESC have been setup and nr_cpus has * been parsed, we know the list of possible cpus. Therefore we can diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 0224d8f19e..1414efd497 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1210,20 +1210,6 @@ void __init smp_setup_processor_id(void) xcall_deliver_impl = hypervisor_xcall_deliver; } -void __init smp_fill_in_cpu_possible_map(void) -{ - int possible_cpus = num_possible_cpus(); - int i; - - if (possible_cpus > nr_cpu_ids) - possible_cpus = nr_cpu_ids; - - for (i = 0; i < possible_cpus; i++) - set_cpu_possible(i, true); - for (; i < NR_CPUS; i++) - set_cpu_possible(i, false); -} - void smp_fill_in_sib_core_maps(void) { unsigned int i; diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c index cc19e09b0f..b073153c71 100644 --- a/arch/sparc/vdso/vma.c +++ b/arch/sparc/vdso/vma.c @@ -449,9 +449,8 @@ static __init int vdso_setup(char *s) unsigned long val; err = kstrtoul(s, 10, &val); - if (err) - return err; - vdso_enabled = val; - return 0; + if (!err) + vdso_enabled = val; + return 1; } __setup("vdso=", vdso_setup); diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 02b0befd67..95ad6b190d 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -673,24 +673,26 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_port *port, goto cleanup; } - *winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list), - .fd = fd, + *winch = ((struct winch) { .fd = fd, .tty_fd = tty_fd, .pid = pid, .port = port, .stack = stack }); + spin_lock(&winch_handler_lock); + list_add(&winch->list, &winch_handlers); + spin_unlock(&winch_handler_lock); + if (um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, IRQF_SHARED, "winch", winch) < 0) { printk(KERN_ERR "register_winch_irq - failed to register " "IRQ\n"); + spin_lock(&winch_handler_lock); + list_del(&winch->list); + spin_unlock(&winch_handler_lock); goto out_free; } - spin_lock(&winch_handler_lock); - list_add(&winch->list, &winch_handlers); - spin_unlock(&winch_handler_lock); - return; out_free: diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index fefd343412..b3a4cc5a20 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1097,7 +1097,7 @@ static int __init ubd_init(void) if (irq_req_buffer == NULL) { printk(KERN_ERR "Failed to initialize ubd buffering\n"); - return -1; + return -ENOMEM; } io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, sizeof(struct io_thread_req *), @@ -1108,7 +1108,7 @@ static int __init ubd_init(void) if (io_req_buffer == NULL) { printk(KERN_ERR "Failed to initialize ubd buffering\n"); - return -1; + return -ENOMEM; } platform_driver_register(&ubd_driver); mutex_lock(&ubd_lock); diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 45a4bcd27a..310fb14a85 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -141,7 +141,7 @@ static bool get_bpf_flash(struct arglist *def) if (allow != NULL) { if (kstrtoul(allow, 10, &result) == 0) - return (allow > 0); + return result > 0; } return false; } diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h index 5b072aba5b..a7cb380c0b 100644 --- a/arch/um/include/asm/mmu.h +++ b/arch/um/include/asm/mmu.h @@ -15,8 +15,6 @@ typedef struct mm_context { struct page *stub_pages[2]; } mm_context_t; -extern void __switch_mm(struct mm_id * mm_idp); - /* Avoid tangled inclusion with asm/ldt.h */ extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm); extern void free_ldt(struct mm_context *mm); diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h index e82e203f5f..92dbf727e3 100644 --- a/arch/um/include/shared/skas/mm_id.h +++ b/arch/um/include/shared/skas/mm_id.h @@ -15,4 +15,6 @@ struct mm_id { int kill; }; +void __switch_mm(struct mm_id *mm_idp); + #endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cfb1edd254..2f6312e7ce 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -61,6 +61,7 @@ config X86 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT + select ARCH_CONFIGURES_CPU_MITIGATIONS select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 || (X86_32 && HIGHMEM) select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG @@ -267,6 +268,8 @@ config X86 select HAVE_ARCH_KCSAN if X86_64 select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS + select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16 + select FUNCTION_ALIGNMENT_4B imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI config INSTRUCTION_DECODER @@ -2392,17 +2395,17 @@ config CC_HAS_SLS config CC_HAS_RETURN_THUNK def_bool $(cc-option,-mfunction-return=thunk-extern) -menuconfig SPECULATION_MITIGATIONS - bool "Mitigations for speculative execution vulnerabilities" +menuconfig CPU_MITIGATIONS + bool "Mitigations for CPU vulnerabilities" default y help - Say Y here to enable options which enable mitigations for - speculative execution hardware vulnerabilities. + Say Y here to enable options which enable mitigations for hardware + vulnerabilities (usually related to speculative execution). If you say N, all mitigations will be disabled. You really should know what you are doing to say so. -if SPECULATION_MITIGATIONS +if CPU_MITIGATIONS config PAGE_TABLE_ISOLATION bool "Remove the kernel mapping in user mode" @@ -2492,6 +2495,27 @@ config GDS_FORCE_MITIGATION If in doubt, say N. +config MITIGATION_RFDS + bool "RFDS Mitigation" + depends on CPU_SUP_INTEL + default y + help + Enable mitigation for Register File Data Sampling (RFDS) by default. + RFDS is a hardware vulnerability which affects Intel Atom CPUs. It + allows unprivileged speculative access to stale data previously + stored in floating point, vector and integer registers. + See also + +config MITIGATION_SPECTRE_BHI + bool "Mitigate Spectre-BHB (Branch History Injection)" + depends on CPU_SUP_INTEL + default y + help + Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks + where the branch history buffer is poisoned to speculatively steer + indirect branches. + See + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d4d6db4dde..5d72d52fbc 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -249,6 +249,7 @@ config UNWINDER_ORC config UNWINDER_FRAME_POINTER bool "Frame pointer unwinder" + select ARCH_WANT_FRAME_POINTERS select FRAME_POINTER help This option enables the frame pointer unwinder for unwinding kernel @@ -272,7 +273,3 @@ config UNWINDER_GUESS overhead. endchoice - -config FRAME_POINTER - depends on !UNWINDER_ORC && !UNWINDER_GUESS - bool diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c3d427c817..e189a16ae4 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -37,6 +37,14 @@ #include #include "pgtable.h" +/* + * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result + * in assembly errors due to trying to move .org backward due to the excessive + * alignment. + */ +#undef __ALIGN +#define __ALIGN .balign 16, 0x90 + /* * Locally defined symbols should be marked hidden: */ diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S index 6a0b15e719..54c0ee4120 100644 --- a/arch/x86/crypto/nh-avx2-x86_64.S +++ b/arch/x86/crypto/nh-avx2-x86_64.S @@ -153,5 +153,6 @@ SYM_FUNC_START(nh_avx2) vpaddq T1, T0, T0 vpaddq T4, T0, T0 vmovdqu T0, (HASH) + vzeroupper RET SYM_FUNC_END(nh_avx2) diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 9bcdbc47b8..f7d7287768 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -710,6 +710,7 @@ done_hash: popq %r13 popq %r12 popq %rbx + vzeroupper RET SYM_FUNC_END(sha256_transform_rorx) diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 5cdaab7d69..1c4e5d88e1 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -679,6 +679,7 @@ done_hash: pop %r12 pop %rbx + vzeroupper RET SYM_FUNC_END(sha512_transform_rorx) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 6c2826417b..e160f502d1 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -47,7 +47,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) if (likely(unr < NR_syscalls)) { unr = array_index_nospec(unr, NR_syscalls); - regs->ax = sys_call_table[unr](regs); + regs->ax = x64_sys_call(regs, unr); return true; } return false; @@ -64,7 +64,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { xnr = array_index_nospec(xnr, X32_NR_syscalls); - regs->ax = x32_sys_call_table[xnr](regs); + regs->ax = x32_sys_call(regs, xnr); return true; } return false; @@ -109,7 +109,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) if (likely(unr < IA32_NR_syscalls)) { unr = array_index_nospec(unr, IA32_NR_syscalls); - regs->ax = ia32_sys_call_table[unr](regs); + regs->ax = ia32_sys_call(regs, unr); } else if (nr != -1) { regs->ax = __ia32_sys_ni_syscall(regs); } diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index bfb7bcb362..09e99d13fc 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include .pushsection .noinstr.text, "ax" @@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb) EXPORT_SYMBOL_GPL(entry_ibpb); .popsection + +/* + * Define the VERW operand that is disguised as entry code so that + * it can be referenced with KPTI enabled. This ensure VERW can be + * used late in exit-to-user path after page tables are switched. + */ +.pushsection .entry.text, "ax" + +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_START_NOALIGN(mds_verw_sel) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + .word __KERNEL_DS +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_END(mds_verw_sel); +/* For KVM */ +EXPORT_SYMBOL_GPL(mds_verw_sel); + +.popsection + diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index e309e71560..ee5def1060 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -912,6 +912,7 @@ SYM_FUNC_START(entry_SYSENTER_32) BUG_IF_WRONG_CR3 no_user_check=1 popfl popl %eax + CLEAR_CPU_BUFFERS /* * Return back to the vDSO, which will pop ecx and edx. @@ -981,6 +982,7 @@ restore_all_switch_stack: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code + CLEAR_CPU_BUFFERS .Lirq_return: /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization @@ -1173,6 +1175,7 @@ SYM_CODE_START(asm_exc_nmi) /* Not on SYSENTER stack. */ call exc_nmi + CLEAR_CPU_BUFFERS jmp .Lnmi_return .Lnmi_from_sysenter_stack: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9f1333a9ee..10d6888713 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -114,6 +114,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) /* clobbers %rax, make sure it is after saving the syscall nr */ IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY call do_syscall_64 /* returns with IRQs disabled */ @@ -219,6 +220,7 @@ syscall_return_via_sysret: popq %rdi popq %rsp swapgs + CLEAR_CPU_BUFFERS sysretq SYM_CODE_END(entry_SYSCALL_64) @@ -637,6 +639,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi SWAPGS + CLEAR_CPU_BUFFERS INTERRUPT_RETURN @@ -743,6 +746,8 @@ native_irq_return_ldt: */ popq %rax /* Restore user RAX */ + CLEAR_CPU_BUFFERS + /* * RSP now points to an ordinary IRET frame, except that the page * is read-only and RSP[31:16] are preloaded with the userspace @@ -1465,6 +1470,12 @@ nmi_restore: std movq $0, 5*8(%rsp) /* clear "NMI executing" */ + /* + * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like + * NMI in kernel after user state is restored. For an unprivileged user + * these conditions are hard to meet. + */ + /* * iretq reads the "iret" frame and exits the NMI stack in a * single instruction. We are returning to kernel mode, so this @@ -1482,6 +1493,7 @@ SYM_CODE_END(asm_exc_nmi) SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax + CLEAR_CPU_BUFFERS sysretl SYM_CODE_END(ignore_sysret) #endif @@ -1499,3 +1511,63 @@ SYM_CODE_START(rewind_stack_and_make_dead) call make_task_dead SYM_CODE_END(rewind_stack_and_make_dead) .popsection + +/* + * This sequence executes branches in order to remove user branch information + * from the branch history tracker in the Branch Predictor, therefore removing + * user influence on subsequent BTB lookups. + * + * It should be used on parts prior to Alder Lake. Newer parts should use the + * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being + * virtualized on newer hardware the VMM should protect against BHI attacks by + * setting BHI_DIS_S for the guests. + * + * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging + * and not clearing the branch history. The call tree looks like: + * + * call 1 + * call 2 + * call 2 + * call 2 + * call 2 + * call 2 + * ret + * ret + * ret + * ret + * ret + * ret + * + * This means that the stack is non-constant and ORC can't unwind it with %rsp + * alone. Therefore we unconditionally set up the frame pointer, which allows + * ORC to unwind properly. + * + * The alignment is for performance and not for safety, and may be safely + * refactored in the future if needed. + */ +SYM_FUNC_START(clear_bhb_loop) + push %rbp + mov %rsp, %rbp + movl $5, %ecx + ANNOTATE_INTRA_FUNCTION_CALL + call 1f + jmp 5f + .align 64, 0xcc + ANNOTATE_INTRA_FUNCTION_CALL +1: call 2f + RET + .align 64, 0xcc +2: movl $5, %eax +3: jmp 4f + nop +4: sub $1, %eax + jnz 3b + sub $1, %ecx + jnz 1b + RET +5: lfence + pop %rbp + RET +SYM_FUNC_END(clear_bhb_loop) +EXPORT_SYMBOL_GPL(clear_bhb_loop) +STACK_FRAME_NON_STANDARD(clear_bhb_loop) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 4d637a965e..d03f0cfbcb 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY /* * SYSENTER doesn't filter flags, so we need to clear NT and AC @@ -259,6 +260,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY movq %rsp, %rdi call do_fast_syscall_32 @@ -319,6 +321,7 @@ sysret32_from_system_call: xorl %r9d, %r9d xorl %r10d, %r10d swapgs + CLEAR_CPU_BUFFERS sysretl SYM_CODE_END(entry_SYSCALL_compat) @@ -421,6 +424,7 @@ SYM_CODE_START(entry_INT80_compat) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY movq %rsp, %rdi call do_int80_syscall_32 diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 8cfc9bc73e..c2235bae17 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -18,8 +18,25 @@ #include #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ +#ifdef CONFIG_X86_32 #define __SYSCALL(nr, sym) __ia32_##sym, - -__visible const sys_call_ptr_t ia32_sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include }; +#undef __SYSCALL +#endif + +#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs); + +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __ia32_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index be120eec1f..33b3f09e6f 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -11,8 +11,23 @@ #include #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ #define __SYSCALL(nr, sym) __x64_##sym, - -asmlinkage const sys_call_ptr_t sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include }; +#undef __SYSCALL + +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); + +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __x64_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index bdd0e03a12..03de4a9321 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -11,8 +11,12 @@ #include #undef __SYSCALL -#define __SYSCALL(nr, sym) __x64_##sym, +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); -asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { -#include +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __x64_sys_ni_syscall(regs); + } }; diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index fd2ee9408e..ba3172d5b3 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -98,11 +98,6 @@ static int addr_to_vsyscall_nr(unsigned long addr) static bool write_ok_or_segv(unsigned long ptr, size_t size) { - /* - * XXX: if access_ok, get_user, and put_user handled - * sig_on_uaccess_err, this could go away. - */ - if (!access_ok((void __user *)ptr, size)) { struct thread_struct *thread = ¤t->thread; @@ -120,10 +115,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) bool emulate_vsyscall(unsigned long error_code, struct pt_regs *regs, unsigned long address) { - struct task_struct *tsk; unsigned long caller; int vsyscall_nr, syscall_nr, tmp; - int prev_sig_on_uaccess_err; long ret; unsigned long orig_dx; @@ -172,8 +165,6 @@ bool emulate_vsyscall(unsigned long error_code, goto sigsegv; } - tsk = current; - /* * Check for access_ok violations and find the syscall nr. * @@ -234,12 +225,8 @@ bool emulate_vsyscall(unsigned long error_code, goto do_ret; /* skip requested */ /* - * With a real vsyscall, page faults cause SIGSEGV. We want to - * preserve that behavior to make writing exploits harder. + * With a real vsyscall, page faults cause SIGSEGV. */ - prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err; - current->thread.sig_on_uaccess_err = 1; - ret = -EFAULT; switch (vsyscall_nr) { case 0: @@ -262,23 +249,12 @@ bool emulate_vsyscall(unsigned long error_code, break; } - current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err; - check_fault: if (ret == -EFAULT) { /* Bad news -- userspace fed a bad pointer to a vsyscall. */ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall fault (exploit attempt?)"); - - /* - * If we failed to generate a signal for any reason, - * generate one here. (This should be impossible.) - */ - if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && - !sigismember(&tsk->pending.signal, SIGSEGV))) - goto sigsegv; - - return true; /* Don't emulate the ret. */ + goto sigsegv; } regs->ax = ret; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 81d5e0a1f4..e55fc25da2 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1649,6 +1649,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) while (++i < cpuc->n_events) { cpuc->event_list[i-1] = cpuc->event_list[i]; cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; + cpuc->assign[i-1] = cpuc->assign[i]; } cpuc->event_constraint[i-1] = NULL; --cpuc->n_events; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 48067af946..d2db8f4fa1 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -12,6 +12,7 @@ #include #include #include +#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -111,7 +112,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) static inline u32 native_apic_mem_read(u32 reg) { - return *((volatile u32 *)(APIC_BASE + reg)); + return readl((void __iomem *)(APIC_BASE + reg)); } extern void native_apic_wait_icr_idle(void); diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 8f80de627c..5cdccea455 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 6dd47c9ec7..fbcfec4dc4 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -6,11 +6,13 @@ # define __ASM_FORM(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, +# define __ASM_REGPFX % #else #include # define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " " # define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__) # define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) "," +# define __ASM_REGPFX %% #endif #define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;) @@ -49,6 +51,9 @@ #define _ASM_SI __ASM_REG(si) #define _ASM_DI __ASM_REG(di) +/* Adds a (%rip) suffix on 64 bits only; for immediate memory references */ +#define _ASM_RIP(x) __ASM_SEL_RAW(x, x (__ASM_REGPFX rip)) + #ifndef __x86_64__ /* 32 bit */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index cc3f62f5d5..955ca6b13e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -33,6 +33,8 @@ enum cpuid_leafs CPUID_7_EDX, CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, + CPUID_LNX_5, + NR_CPUID_WORDS, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -93,8 +95,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -118,8 +121,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d6089072ee..18817817ea 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 21 /* N 32-bit words worth of info */ +#define NCAPINTS 22 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* @@ -302,7 +302,7 @@ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ - +#define X86_FEATURE_CLEAR_CPU_BUF (11*32+18) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ @@ -416,10 +416,22 @@ #define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ #define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0x80000022, etc and Linux defined features. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ + /* * BUG word(s) */ @@ -466,4 +478,6 @@ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 99a12012c6..b05d815cea 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -109,6 +109,7 @@ #define DISABLED_MASK18 0 #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define DISABLED_MASK21 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index a12fdf01dc..f27d6cecd8 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - mds_user_clear_cpu_buffers(); amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 08cfc26ee7..f779facd82 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -732,6 +732,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; + bool is_amd_compatible; u64 reserved_gpa_bits; int maxphyaddr; diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 5000cf59bd..04a333c334 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -11,12 +11,14 @@ #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) #endif /* CONFIG_X86_32 */ -#ifdef __ASSEMBLY__ - -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) -#define __ALIGN .p2align 4, 0x90 +#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90; #define __ALIGN_STR __stringify(__ALIGN) -#endif + +#define ASM_FUNC_ALIGN __ALIGN_STR +#define __FUNC_ALIGN __ALIGN +#define SYM_F_ALIGN __FUNC_ALIGN + +#ifdef __ASSEMBLY__ #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5672ccb80e..15939a71dc 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -30,6 +30,7 @@ #define _EFER_SVME 12 /* Enable virtualization */ #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ +#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ #define EFER_SCE (1<<_EFER_SCE) #define EFER_LME (1<<_EFER_LME) @@ -38,6 +39,7 @@ #define EFER_SVME (1<<_EFER_SVME) #define EFER_LMSLE (1<<_EFER_LMSLE) #define EFER_FFXSR (1<<_EFER_FFXSR) +#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) /* Intel MSRs. Some also available on other CPUs */ @@ -53,10 +55,13 @@ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) +#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */ +#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT) /* A mask for bits which the kernel toggles when controlling mitigations */ #define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ - | SPEC_CTRL_RRSBA_DIS_S) + | SPEC_CTRL_RRSBA_DIS_S \ + | SPEC_CTRL_BHI_DIS_S) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -153,6 +158,10 @@ * are restricted to targets in * kernel. */ +#define ARCH_CAP_BHI_NO BIT(20) /* + * CPU is not affected by Branch + * History Injection. + */ #define ARCH_CAP_PBRSB_NO BIT(24) /* * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. @@ -166,6 +175,14 @@ * CPU is not vulnerable to Gather * Data Sampling (GDS). */ +#define ARCH_CAP_RFDS_NO BIT(27) /* + * Not susceptible to Register + * File Data Sampling. + */ +#define ARCH_CAP_RFDS_CLEAR BIT(28) /* + * VERW clears CPU Register + * File. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f3f6c28e58..ed582fa98c 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -155,11 +155,20 @@ .Lskip_rsb_\@: .endm +/* + * The CALL to srso_alias_untrain_ret() must be patched in directly at + * the spot where untraining must be done, ie., srso_alias_untrain_ret() + * must be the target of a CALL instruction instead of indirectly + * jumping to a wrapper which then calls it. Therefore, this macro is + * called outside of __UNTRAIN_RET below, for the time being, before the + * kernel can support nested alternatives with arbitrary nesting. + */ +.macro CALL_UNTRAIN_RET #ifdef CONFIG_CPU_UNRET_ENTRY -#define CALL_UNTRAIN_RET "call entry_untrain_ret" -#else -#define CALL_UNTRAIN_RET "" + ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ + "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS #endif +.endm /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the @@ -176,12 +185,37 @@ #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ defined(CONFIG_CPU_SRSO) ANNOTATE_UNRET_END - ALTERNATIVE_2 "", \ - CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ - "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + CALL_UNTRAIN_RET + ALTERNATIVE "", "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm +/* + * Macro to execute VERW instruction that mitigate transient data sampling + * attacks such as MDS. On affected systems a microcode update overloaded VERW + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. + * + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +.macro CLEAR_CPU_BUFFERS + ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF + verw _ASM_RIP(mds_verw_sel) +.Lskip_verw_\@: +.endm + +#ifdef CONFIG_X86_64 +.macro CLEAR_BRANCH_HISTORY + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP +.endm + +.macro CLEAR_BRANCH_HISTORY_VMEXIT + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT +.endm +#else +#define CLEAR_BRANCH_HISTORY +#define CLEAR_BRANCH_HISTORY_VMEXIT +#endif + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -207,6 +241,10 @@ extern void srso_alias_untrain_ret(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_X86_64 +extern void clear_bhb_loop(void); +#endif + extern void (*x86_return_thunk)(void); #ifdef CONFIG_RETPOLINE @@ -357,13 +395,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +extern u16 mds_verw_sel; + #include /** @@ -389,17 +428,6 @@ static __always_inline void mds_clear_cpu_buffers(void) asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); } -/** - * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability - * - * Clear CPU buffers if the corresponding static key is enabled - */ -static __always_inline void mds_user_clear_cpu_buffers(void) -{ - if (static_branch_likely(&mds_user_clear)) - mds_clear_cpu_buffers(); -} - /** * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability * diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bbbf27cfe7..0702e0c5db 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -519,7 +519,6 @@ struct thread_struct { unsigned long iopl_emul; unsigned int iopl_warn:1; - unsigned int sig_on_uaccess_err:1; /* * Protection Keys Register for Userspace. Loaded immediately on diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 9bf60a8b9e..1fbe53583e 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -103,6 +103,7 @@ #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define REQUIRED_MASK21 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index a800abb1a9..d8416b3bf8 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -12,11 +12,6 @@ /* image of the saved processor state */ struct saved_context { - /* - * On x86_32, all segment registers except gs are saved at kernel - * entry in pt_regs. - */ - u16 gs; unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; struct saved_msrs saved_msrs; @@ -27,6 +22,11 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + /* + * On x86_32, all segment registers except gs are saved at kernel + * entry in pt_regs. + */ + u16 gs; bool misc_enable_saved; } __attribute__((packed)); diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index f7e2d82d24..825528bf0d 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,19 +16,17 @@ #include /* for TS_COMPAT */ #include +/* This is used purely for kernel/trace/trace_syscalls.c */ typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; -#if defined(CONFIG_X86_32) -#define ia32_sys_call_table sys_call_table -#else /* * These may not exist, but still put the prototypes in so we * can use IS_ENABLED(). */ -extern const sys_call_ptr_t ia32_sys_call_table[]; -extern const sys_call_ptr_t x32_sys_call_table[]; -#endif +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); /* * Only the low 32 bits of orig_ax are meaningful, so we return int. diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index c6015b4074..7281ce64e9 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -181,6 +181,37 @@ void int3_emulate_ret(struct pt_regs *regs) unsigned long ip = int3_emulate_pop(regs); int3_emulate_jmp(regs, ip); } + +static __always_inline +void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) +{ + static const unsigned long jcc_mask[6] = { + [0] = X86_EFLAGS_OF, + [1] = X86_EFLAGS_CF, + [2] = X86_EFLAGS_ZF, + [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, + [4] = X86_EFLAGS_SF, + [5] = X86_EFLAGS_PF, + }; + + bool invert = cc & 1; + bool match; + + if (cc < 0xc) { + match = regs->flags & jcc_mask[cc >> 1]; + } else { + match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ + ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); + if (cc >= 0xe) + match = match || (regs->flags & X86_EFLAGS_ZF); + } + + if ((match && !invert) || (!match && invert)) + ip += disp; + + int3_emulate_jmp(regs, ip); +} + #endif /* !CONFIG_UML_X86 */ #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e5536edbae..5614e6d219 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -351,6 +351,12 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, kasan_enable_current(); } +static inline bool is_jcc32(struct insn *insn) +{ + /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ + return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80; +} + #if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION) /* @@ -1201,6 +1207,11 @@ void text_poke_sync(void) on_each_cpu(do_sync_core, NULL, 1); } +/* + * NOTE: crazy scheme to allow patching Jcc.d32 but not increase the size of + * this thing. When len == 6 everything is prefixed with 0x0f and we map + * opcode to Jcc.d8, using len to distinguish. + */ struct text_poke_loc { /* addr := _stext + rel_addr */ s32 rel_addr; @@ -1322,6 +1333,10 @@ noinstr int poke_int3_handler(struct pt_regs *regs) int3_emulate_jmp(regs, (long)ip + tp->disp); break; + case 0x70 ... 0x7f: /* Jcc */ + int3_emulate_jcc(regs, tp->opcode & 0xf, (long)ip, tp->disp); + break; + default: BUG(); } @@ -1395,16 +1410,26 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries * Second step: update all but the first byte of the patched range. */ for (do_sync = 0, i = 0; i < nr_entries; i++) { - u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, }; + u8 old[POKE_MAX_OPCODE_SIZE+1] = { tp[i].old, }; + u8 _new[POKE_MAX_OPCODE_SIZE+1]; + const u8 *new = tp[i].text; int len = tp[i].len; if (len - INT3_INSN_SIZE > 0) { memcpy(old + INT3_INSN_SIZE, text_poke_addr(&tp[i]) + INT3_INSN_SIZE, len - INT3_INSN_SIZE); + + if (len == 6) { + _new[0] = 0x0f; + memcpy(_new + 1, new, 5); + new = _new; + } + text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE, - (const char *)tp[i].text + INT3_INSN_SIZE, + new + INT3_INSN_SIZE, len - INT3_INSN_SIZE); + do_sync++; } @@ -1432,8 +1457,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries * The old instruction is recorded so that the event can be * processed forwards or backwards. */ - perf_event_text_poke(text_poke_addr(&tp[i]), old, len, - tp[i].text, len); + perf_event_text_poke(text_poke_addr(&tp[i]), old, len, new, len); } if (do_sync) { @@ -1450,10 +1474,15 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries * replacing opcode. */ for (do_sync = 0, i = 0; i < nr_entries; i++) { - if (tp[i].text[0] == INT3_INSN_OPCODE) + u8 byte = tp[i].text[0]; + + if (tp[i].len == 6) + byte = 0x0f; + + if (byte == INT3_INSN_OPCODE) continue; - text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE); + text_poke(text_poke_addr(&tp[i]), &byte, INT3_INSN_SIZE); do_sync++; } @@ -1471,9 +1500,11 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, const void *opcode, size_t len, const void *emulate) { struct insn insn; - int ret, i; + int ret, i = 0; - memcpy((void *)tp->text, opcode, len); + if (len == 6) + i = 1; + memcpy((void *)tp->text, opcode+i, len-i); if (!emulate) emulate = opcode; @@ -1484,6 +1515,13 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, tp->len = len; tp->opcode = insn.opcode.bytes[0]; + if (is_jcc32(&insn)) { + /* + * Map Jcc.d32 onto Jcc.d8 and use len to distinguish. + */ + tp->opcode = insn.opcode.bytes[1] - 0x10; + } + switch (tp->opcode) { case RET_INSN_OPCODE: case JMP32_INSN_OPCODE: @@ -1500,7 +1538,6 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, BUG_ON(len != insn.length); }; - switch (tp->opcode) { case INT3_INSN_OPCODE: case RET_INSN_OPCODE: @@ -1509,6 +1546,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, case CALL_INSN_OPCODE: case JMP32_INSN_OPCODE: case JMP8_INSN_OPCODE: + case 0x70 ... 0x7f: /* Jcc */ tp->disp = insn.immediate.value; break; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index c132daabe6..fb539244c5 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -982,7 +982,8 @@ static void __send_cleanup_vector(struct apic_chip_data *apicd) hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu)); apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR); } else { - apicd->prev_vector = 0; + pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu); + free_moved_vector(apicd); } raw_spin_unlock(&vector_lock); } @@ -1019,6 +1020,7 @@ void irq_complete_move(struct irq_cfg *cfg) */ void irq_force_complete_move(struct irq_desc *desc) { + unsigned int cpu = smp_processor_id(); struct apic_chip_data *apicd; struct irq_data *irqd; unsigned int vector; @@ -1043,10 +1045,11 @@ void irq_force_complete_move(struct irq_desc *desc) goto unlock; /* - * If prev_vector is empty, no action required. + * If prev_vector is empty or the descriptor is neither currently + * nor previously on the outgoing CPU no action required. */ vector = apicd->prev_vector; - if (!vector) + if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) goto unlock; /* diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dba7fe7ece..9fb890574f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1021,11 +1021,11 @@ static bool cpu_has_zenbleed_microcode(void) u32 good_rev = 0; switch (boot_cpu_data.x86_model) { - case 0x30 ... 0x3f: good_rev = 0x0830107a; break; - case 0x60 ... 0x67: good_rev = 0x0860010b; break; - case 0x68 ... 0x6f: good_rev = 0x08608105; break; - case 0x70 ... 0x7f: good_rev = 0x08701032; break; - case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; + case 0x30 ... 0x3f: good_rev = 0x0830107b; break; + case 0x60 ... 0x67: good_rev = 0x0860010c; break; + case 0x68 ... 0x6f: good_rev = 0x08608107; break; + case 0x70 ... 0x7f: good_rev = 0x08701033; break; + case 0xa0 ... 0xaf: good_rev = 0x08a00009; break; default: return false; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d1ba55ea46..247545b57d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,6 +60,8 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); +static u64 __ro_after_init x86_arch_cap_msr; + static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; @@ -110,9 +112,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -/* Control MDS CPU buffer clear before returning to user space */ -DEFINE_STATIC_KEY_FALSE(mds_user_clear); -EXPORT_SYMBOL_GPL(mds_user_clear); /* Control MDS CPU buffer clear before idling (halt, mwait) */ DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); @@ -146,6 +145,8 @@ void __init cpu_select_mitigations(void) x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; } + x86_arch_cap_msr = x86_read_arch_cap_msr(); + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -258,7 +259,7 @@ static void __init mds_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && (mds_nosmt || cpu_mitigations_auto_nosmt())) @@ -310,8 +311,6 @@ static const char * const taa_strings[] = { static void __init taa_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_TAA)) { taa_mitigation = TAA_MITIGATION_OFF; return; @@ -350,9 +349,8 @@ static void __init taa_select_mitigation(void) * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode * update is required. */ - ia32_cap = x86_read_arch_cap_msr(); - if ( (ia32_cap & ARCH_CAP_MDS_NO) && - !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) && + !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)) taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; /* @@ -362,7 +360,7 @@ static void __init taa_select_mitigation(void) * For guests that can't determine whether the correct microcode is * present on host, enable the mitigation for UCODE_NEEDED as well. */ - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -410,8 +408,6 @@ static const char * const mmio_strings[] = { static void __init mmio_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || cpu_mitigations_off()) { @@ -422,15 +418,20 @@ static void __init mmio_select_mitigation(void) if (mmio_mitigation == MMIO_MITIGATION_OFF) return; - ia32_cap = x86_read_arch_cap_msr(); - /* * Enable CPU buffer clear mitigation for host and VMM, if also affected * by MDS or TAA. Otherwise, enable mitigation for VMM only. */ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM))) - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + + /* + * X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based + * mitigations, disable KVM-only mitigation in that case. + */ + if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) + static_branch_disable(&mmio_stale_data_clear); else static_branch_enable(&mmio_stale_data_clear); @@ -439,7 +440,7 @@ static void __init mmio_select_mitigation(void) * be propagated to uncore buffers, clearing the Fill buffers on idle * is required irrespective of SMT state. */ - if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) static_branch_enable(&mds_idle_clear); /* @@ -449,10 +450,10 @@ static void __init mmio_select_mitigation(void) * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS * affected systems. */ - if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) || (boot_cpu_has(X86_FEATURE_MD_CLEAR) && boot_cpu_has(X86_FEATURE_FLUSH_L1D) && - !(ia32_cap & ARCH_CAP_MDS_NO))) + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))) mmio_mitigation = MMIO_MITIGATION_VERW; else mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; @@ -482,6 +483,57 @@ static int __init mmio_stale_data_parse_cmdline(char *str) } early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "Register File Data Sampling: " fmt + +enum rfds_mitigations { + RFDS_MITIGATION_OFF, + RFDS_MITIGATION_VERW, + RFDS_MITIGATION_UCODE_NEEDED, +}; + +/* Default mitigation for Register File Data Sampling */ +static enum rfds_mitigations rfds_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_VERW : RFDS_MITIGATION_OFF; + +static const char * const rfds_strings[] = { + [RFDS_MITIGATION_OFF] = "Vulnerable", + [RFDS_MITIGATION_VERW] = "Mitigation: Clear Register File", + [RFDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", +}; + +static void __init rfds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) { + rfds_mitigation = RFDS_MITIGATION_OFF; + return; + } + if (rfds_mitigation == RFDS_MITIGATION_OFF) + return; + + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + else + rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; +} + +static __init int rfds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_RFDS)) + return 0; + + if (!strcmp(str, "off")) + rfds_mitigation = RFDS_MITIGATION_OFF; + else if (!strcmp(str, "on")) + rfds_mitigation = RFDS_MITIGATION_VERW; + + return 0; +} +early_param("reg_file_data_sampling", rfds_parse_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "" fmt @@ -490,12 +542,12 @@ static void __init md_clear_update_mitigation(void) if (cpu_mitigations_off()) return; - if (!static_key_enabled(&mds_user_clear)) + if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) goto out; /* - * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data - * mitigation, if necessary. + * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO + * Stale Data mitigation, if necessary. */ if (mds_mitigation == MDS_MITIGATION_OFF && boot_cpu_has_bug(X86_BUG_MDS)) { @@ -507,11 +559,19 @@ static void __init md_clear_update_mitigation(void) taa_mitigation = TAA_MITIGATION_VERW; taa_select_mitigation(); } - if (mmio_mitigation == MMIO_MITIGATION_OFF && - boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + /* + * MMIO_MITIGATION_OFF is not checked here so that mmio_stale_data_clear + * gets updated correctly as per X86_FEATURE_CLEAR_CPU_BUF state. + */ + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { mmio_mitigation = MMIO_MITIGATION_VERW; mmio_select_mitigation(); } + if (rfds_mitigation == RFDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_RFDS)) { + rfds_mitigation = RFDS_MITIGATION_VERW; + rfds_select_mitigation(); + } out: if (boot_cpu_has_bug(X86_BUG_MDS)) pr_info("MDS: %s\n", mds_strings[mds_mitigation]); @@ -521,6 +581,8 @@ static void __init md_clear_update_mitigation(void) pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) pr_info("MMIO Stale Data: Unknown: No mitigations\n"); + if (boot_cpu_has_bug(X86_BUG_RFDS)) + pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]); } static void __init md_clear_select_mitigation(void) @@ -528,11 +590,12 @@ static void __init md_clear_select_mitigation(void) mds_select_mitigation(); taa_select_mitigation(); mmio_select_mitigation(); + rfds_select_mitigation(); /* - * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update - * and print their mitigation after MDS, TAA and MMIO Stale Data - * mitigation selection is done. + * As these mitigations are inter-related and rely on VERW instruction + * to clear the microarchitural buffers, update and print their status + * after mitigation selection is done for each of these vulnerabilities. */ md_clear_update_mitigation(); } @@ -592,8 +655,6 @@ void update_srbds_msr(void) static void __init srbds_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_SRBDS)) return; @@ -602,8 +663,7 @@ static void __init srbds_select_mitigation(void) * are only exposed to SRBDS when TSX is enabled or when CPU is affected * by Processor MMIO Stale Data vulnerability. */ - ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) @@ -746,7 +806,7 @@ static void __init gds_select_mitigation(void) /* Will verify below that mitigation _can_ be disabled */ /* No microcode */ - if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { + if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) { if (gds_mitigation == GDS_MITIGATION_FORCE) { /* * This only needs to be done on the boot CPU so do it @@ -1288,19 +1348,21 @@ spectre_v2_user_select_mitigation(void) } /* - * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP + * If no STIBP, Intel enhanced IBRS is enabled, or SMT impossible, STIBP * is not required. * - * Enhanced IBRS also protects against cross-thread branch target + * Intel's Enhanced IBRS also protects against cross-thread branch target * injection in user-mode as the IBRS bit remains always set which * implicitly enables cross-thread protections. However, in legacy IBRS * mode, the IBRS bit is set only on kernel entry and cleared on return - * to userspace. This disables the implicit cross-thread protection, - * so allow for STIBP to be selected in that case. + * to userspace. AMD Automatic IBRS also does not protect userspace. + * These modes therefore disable the implicit cross-thread protection, + * so allow for STIBP to be selected in those cases. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS))) return; /* @@ -1330,9 +1392,9 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines", [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE", - [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", - [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", - [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines", [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; @@ -1401,7 +1463,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { - pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n", + pr_err("%s selected but CPU doesn't have Enhanced or Automatic IBRS. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -1453,20 +1515,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +static bool __ro_after_init rrsba_disabled; + /* Disable in-kernel use of non-RSB RET predictors */ static void __init spec_ctrl_disable_kernel_rrsba(void) { - u64 ia32_cap; + if (rrsba_disabled) + return; - if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) { + rrsba_disabled = true; return; + } - ia32_cap = x86_read_arch_cap_msr(); + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; - if (ia32_cap & ARCH_CAP_RRSBA) { - x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - update_spec_ctrl(x86_spec_ctrl_base); - } + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + rrsba_disabled = true; } static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) @@ -1516,6 +1583,74 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ dump_stack(); } +/* + * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by + * branch history in userspace. Not needed if BHI_NO is set. + */ +static bool __init spec_ctrl_bhi_dis(void) +{ + if (!boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW); + + return true; +} + +enum bhi_mitigations { + BHI_MITIGATION_OFF, + BHI_MITIGATION_ON, +}; + +static enum bhi_mitigations bhi_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; + +static int __init spectre_bhi_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + bhi_mitigation = BHI_MITIGATION_OFF; + else if (!strcmp(str, "on")) + bhi_mitigation = BHI_MITIGATION_ON; + else + pr_err("Ignoring unknown spectre_bhi option (%s)", str); + + return 0; +} +early_param("spectre_bhi", spectre_bhi_parse_cmdline); + +static void __init bhi_select_mitigation(void) +{ + if (bhi_mitigation == BHI_MITIGATION_OFF) + return; + + /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { + spec_ctrl_disable_kernel_rrsba(); + if (rrsba_disabled) + return; + } + + if (spec_ctrl_bhi_dis()) + return; + + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* Mitigate KVM by default */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); + + /* Mitigate syscalls when the mitigation is forced =on */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); + pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1586,8 +1721,12 @@ static void __init spectre_v2_select_mitigation(void) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); if (spectre_v2_in_ibrs_mode(mode)) { - x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - update_spec_ctrl(x86_spec_ctrl_base); + if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) { + msr_set_bit(MSR_EFER, _EFER_AUTOIBRS); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + update_spec_ctrl(x86_spec_ctrl_base); + } } switch (mode) { @@ -1622,6 +1761,9 @@ static void __init spectre_v2_select_mitigation(void) mode == SPECTRE_V2_RETPOLINE) spec_ctrl_disable_kernel_rrsba(); + if (boot_cpu_has(X86_BUG_BHI)) + bhi_select_mitigation(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -1671,8 +1813,8 @@ static void __init spectre_v2_select_mitigation(void) /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS * and Enhanced IBRS protect firmware too, so enable IBRS around - * firmware calls only when IBRS / Enhanced IBRS aren't otherwise - * enabled. + * firmware calls only when IBRS / Enhanced / Automatic IBRS aren't + * otherwise enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if @@ -1736,8 +1878,6 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { - u64 ia32_cap = x86_read_arch_cap_msr(); - /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1752,7 +1892,7 @@ static void update_mds_branch_idle(void) if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || - (ia32_cap & ARCH_CAP_FBSDP_NO)) { + (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); } } @@ -2502,74 +2642,74 @@ static const char * const l1tf_vmx_states[] = { static ssize_t l1tf_show_state(char *buf) { if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) - return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); + return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG); if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && sched_smt_active())) { - return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, - l1tf_vmx_states[l1tf_vmx_mitigation]); + return sysfs_emit(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation]); } - return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, - l1tf_vmx_states[l1tf_vmx_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, + l1tf_vmx_states[l1tf_vmx_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t itlb_multihit_show_state(char *buf) { if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || !boot_cpu_has(X86_FEATURE_VMX)) - return sprintf(buf, "KVM: Mitigation: VMX unsupported\n"); + return sysfs_emit(buf, "KVM: Mitigation: VMX unsupported\n"); else if (!(cr4_read_shadow() & X86_CR4_VMXE)) - return sprintf(buf, "KVM: Mitigation: VMX disabled\n"); + return sysfs_emit(buf, "KVM: Mitigation: VMX disabled\n"); else if (itlb_multihit_kvm_mitigation) - return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); + return sysfs_emit(buf, "KVM: Mitigation: Split huge pages\n"); else - return sprintf(buf, "KVM: Vulnerable\n"); + return sysfs_emit(buf, "KVM: Vulnerable\n"); } #else static ssize_t l1tf_show_state(char *buf) { - return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); + return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG); } static ssize_t itlb_multihit_show_state(char *buf) { - return sprintf(buf, "Processor vulnerable\n"); + return sysfs_emit(buf, "Processor vulnerable\n"); } #endif static ssize_t mds_show_state(char *buf) { if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { - return sprintf(buf, "%s; SMT Host state unknown\n", - mds_strings[mds_mitigation]); + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + mds_strings[mds_mitigation]); } if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { - return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], - (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : - sched_smt_active() ? "mitigated" : "disabled")); + return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : + sched_smt_active() ? "mitigated" : "disabled")); } - return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t tsx_async_abort_show_state(char *buf) { if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) || (taa_mitigation == TAA_MITIGATION_OFF)) - return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); + return sysfs_emit(buf, "%s\n", taa_strings[taa_mitigation]); if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { - return sprintf(buf, "%s; SMT Host state unknown\n", - taa_strings[taa_mitigation]); + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + taa_strings[taa_mitigation]); } - return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], - sched_smt_active() ? "vulnerable" : "disabled"); + return sysfs_emit(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); } static ssize_t mmio_stale_data_show_state(char *buf) @@ -2589,22 +2729,28 @@ static ssize_t mmio_stale_data_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t rfds_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]); +} + static char *stibp_state(void) { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS)) return ""; switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: - return ", STIBP: disabled"; + return "; STIBP: disabled"; case SPECTRE_V2_USER_STRICT: - return ", STIBP: forced"; + return "; STIBP: forced"; case SPECTRE_V2_USER_STRICT_PREFERRED: - return ", STIBP: always-on"; + return "; STIBP: always-on"; case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) - return ", STIBP: conditional"; + return "; STIBP: conditional"; } return ""; } @@ -2613,10 +2759,10 @@ static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { if (static_key_enabled(&switch_mm_always_ibpb)) - return ", IBPB: always-on"; + return "; IBPB: always-on"; if (static_key_enabled(&switch_mm_cond_ibpb)) - return ", IBPB: conditional"; - return ", IBPB: disabled"; + return "; IBPB: conditional"; + return "; IBPB: disabled"; } return ""; } @@ -2626,58 +2772,77 @@ static char *pbrsb_eibrs_state(void) if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) - return ", PBRSB-eIBRS: SW sequence"; + return "; PBRSB-eIBRS: SW sequence"; else - return ", PBRSB-eIBRS: Vulnerable"; + return "; PBRSB-eIBRS: Vulnerable"; } else { - return ", PBRSB-eIBRS: Not affected"; + return "; PBRSB-eIBRS: Not affected"; } } +static const char *spectre_bhi_state(void) +{ + if (!boot_cpu_has_bug(X86_BUG_BHI)) + return "; BHI: Not affected"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + return "; BHI: BHI_DIS_S"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + return "; BHI: SW loop, KVM: SW loop"; + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) && + rrsba_disabled) + return "; BHI: Retpoline"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Vulnerable, KVM: SW loop"; + + return "; BHI: Vulnerable"; +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) - return sprintf(buf, "Vulnerable: LFENCE\n"); + return sysfs_emit(buf, "Vulnerable: LFENCE\n"); if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) - return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); + return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); if (sched_smt_active() && unprivileged_ebpf_enabled() && spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) - return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); + return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sprintf(buf, "%s%s%s%s%s%s%s\n", - spectre_v2_strings[spectre_v2_enabled], - ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", - pbrsb_eibrs_state(), - spectre_v2_module_string()); + return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", + pbrsb_eibrs_state(), + spectre_bhi_state(), + /* this should always be at the end */ + spectre_v2_module_string()); } static ssize_t srbds_show_state(char *buf) { - return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); + return sysfs_emit(buf, "%s\n", srbds_strings[srbds_mitigation]); } static ssize_t retbleed_show_state(char *buf) { if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET || retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) - return sprintf(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n"); + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return sysfs_emit(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n"); - return sprintf(buf, "%s; SMT %s\n", - retbleed_strings[retbleed_mitigation], - !sched_smt_active() ? "disabled" : - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? - "enabled with STIBP protection" : "vulnerable"); + return sysfs_emit(buf, "%s; SMT %s\n", retbleed_strings[retbleed_mitigation], + !sched_smt_active() ? "disabled" : + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? + "enabled with STIBP protection" : "vulnerable"); } - return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); + return sysfs_emit(buf, "%s\n", retbleed_strings[retbleed_mitigation]); } static ssize_t gds_show_state(char *buf) @@ -2699,26 +2864,26 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr char *buf, unsigned int bug) { if (!boot_cpu_has_bug(bug)) - return sprintf(buf, "Not affected\n"); + return sysfs_emit(buf, "Not affected\n"); switch (bug) { case X86_BUG_CPU_MELTDOWN: if (boot_cpu_has(X86_FEATURE_PTI)) - return sprintf(buf, "Mitigation: PTI\n"); + return sysfs_emit(buf, "Mitigation: PTI\n"); if (hypervisor_is_type(X86_HYPER_XEN_PV)) - return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n"); + return sysfs_emit(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n"); break; case X86_BUG_SPECTRE_V1: - return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); + return sysfs_emit(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: return spectre_v2_show_state(buf); case X86_BUG_SPEC_STORE_BYPASS: - return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + return sysfs_emit(buf, "%s\n", ssb_strings[ssb_mode]); case X86_BUG_L1TF: if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) @@ -2750,11 +2915,14 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRSO: return srso_show_state(buf); + case X86_BUG_RFDS: + return rfds_show_state(buf); + default: break; } - return sprintf(buf, "Vulnerable\n"); + return sysfs_emit(buf, "Vulnerable\n"); } ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) @@ -2824,4 +2992,9 @@ ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribut { return cpu_show_common(dev, attr, buf, X86_BUG_SRSO); } + +ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 01c4f8f45b..809e12f130 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1036,6 +1036,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SPECTRE_V2 BIT(8) #define NO_MMIO BIT(9) #define NO_EIBRS_PBRSB BIT(10) +#define NO_BHI BIT(11) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1096,18 +1097,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), {} }; @@ -1138,6 +1139,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define SRSO BIT(5) /* CPU is affected by GDS */ #define GDS BIT(6) +/* CPU is affected by Register File Data Sampling */ +#define RFDS BIT(7) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1165,9 +1168,18 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_N, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), @@ -1186,28 +1198,46 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi u64 x86_read_arch_cap_msr(void) { - u64 ia32_cap = 0; + u64 x86_arch_cap_msr = 0; if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr); - return ia32_cap; + return x86_arch_cap_msr; } -static bool arch_cap_mmio_immune(u64 ia32_cap) +static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr) { - return (ia32_cap & ARCH_CAP_FBSDP_NO && - ia32_cap & ARCH_CAP_PSDP_NO && - ia32_cap & ARCH_CAP_SBDR_SSDP_NO); + return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO && + x86_arch_cap_msr & ARCH_CAP_PSDP_NO && + x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO); +} + +static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) +{ + /* The "immunity" bit trumps everything else: */ + if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO) + return false; + + /* + * VMMs set ARCH_CAP_RFDS_CLEAR for processors not in the blacklist to + * indicate that mitigation is needed because guest is running on a + * vulnerable hardware or may migrate to such hardware: + */ + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) + return true; + + /* Only consult the blacklist when there is no enumeration: */ + return cpu_matches(cpu_vuln_blacklist, RFDS); } static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { - u64 ia32_cap = x86_read_arch_cap_msr(); + u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && - !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) @@ -1219,15 +1249,23 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); - if (ia32_cap & ARCH_CAP_IBRS_ALL) + /* + * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature + * flag and protect from vendor-specific bugs via the whitelist. + */ + if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); + if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && + !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO)) + setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && - !(ia32_cap & ARCH_CAP_MDS_NO)) { + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); @@ -1246,9 +1284,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * TSX_CTRL check alone is not sufficient for cases when the microcode * update is not present or running as guest that don't get TSX_CTRL. */ - if (!(ia32_cap & ARCH_CAP_TAA_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) && (cpu_has(c, X86_FEATURE_RTM) || - (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); /* @@ -1274,7 +1312,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (!arch_cap_mmio_immune(ia32_cap)) { + if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) @@ -1282,15 +1320,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { - if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA)) setup_force_cpu_bug(X86_BUG_RETBLEED); } - if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && - !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) - setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); - if (cpu_matches(cpu_vuln_blacklist, SMT_RSB)) setup_force_cpu_bug(X86_BUG_SMT_RSB); @@ -1300,7 +1333,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * disabling AVX2. The only way to do this in HW is to clear XCR0[2], * which means that AVX will be disabled. */ - if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) && boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); @@ -1309,11 +1342,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SRSO); } + if (vulnerable_to_rfds(x86_arch_cap_msr)) + setup_force_cpu_bug(X86_BUG_RFDS); + + /* When virtualized, eIBRS could be hidden, assume vulnerable */ + if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && + !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || + boot_cpu_has(X86_FEATURE_HYPERVISOR))) + setup_force_cpu_bug(X86_BUG_BHI); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ - if (ia32_cap & ARCH_CAP_RDCL_NO) + if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO) return; setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index defda61f37..2161676577 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -44,7 +44,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, { X86_FEATURE_AES, X86_FEATURE_XMM2 }, { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, + { X86_FEATURE_GFNI, X86_FEATURE_XMM2 }, { X86_FEATURE_FMA, X86_FEATURE_AVX }, + { X86_FEATURE_VAES, X86_FEATURE_AVX }, + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX }, { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, @@ -56,9 +59,6 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, - { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index eb48729e22..30d822c2e5 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2382,12 +2382,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, return -EINVAL; b = &per_cpu(mce_banks_array, s->id)[bank]; - if (!b->init) return -ENODEV; b->ctl = new; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); return size; } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 06bfef1c41..0f52110878 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 052ea7425c..893f040b97 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -463,50 +463,26 @@ static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs) } NOKPROBE_SYMBOL(kprobe_emulate_call); -static nokprobe_inline -void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond) +static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs) { unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; - if (cond) - ip += p->ainsn.rel32; + ip += p->ainsn.rel32; int3_emulate_jmp(regs, ip); } - -static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs) -{ - __kprobe_emulate_jmp(p, regs, true); -} NOKPROBE_SYMBOL(kprobe_emulate_jmp); -static const unsigned long jcc_mask[6] = { - [0] = X86_EFLAGS_OF, - [1] = X86_EFLAGS_CF, - [2] = X86_EFLAGS_ZF, - [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, - [4] = X86_EFLAGS_SF, - [5] = X86_EFLAGS_PF, -}; - static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) { - bool invert = p->ainsn.jcc.type & 1; - bool match; + unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; - if (p->ainsn.jcc.type < 0xc) { - match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1]; - } else { - match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ - ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); - if (p->ainsn.jcc.type >= 0xe) - match = match || (regs->flags & X86_EFLAGS_ZF); - } - __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert)); + int3_emulate_jcc(regs, p->ainsn.jcc.type, ip, p->ainsn.rel32); } NOKPROBE_SYMBOL(kprobe_emulate_jcc); static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs) { + unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; bool match; if (p->ainsn.loop.type != 3) { /* LOOP* */ @@ -534,7 +510,9 @@ static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs) else if (p->ainsn.loop.type == 1) /* LOOPE */ match = match && (regs->flags & X86_EFLAGS_ZF); - __kprobe_emulate_jmp(p, regs, match); + if (match) + ip += p->ainsn.rel32; + int3_emulate_jmp(regs, ip); } NOKPROBE_SYMBOL(kprobe_emulate_loop); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 4bce802d25..b892fe7035 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -519,9 +519,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi) write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; - - if (user_mode(regs)) - mds_user_clear_cpu_buffers(); } #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b8fe38cd12..d69d9d81d2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -137,7 +137,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, log_lvl, d3, d6, d7); } - if (cpu_feature_enabled(X86_FEATURE_OSPKE)) + if (cr4 & X86_CR4_PKE) printk("%sPKRU: %08x\n", log_lvl, read_pkru()); } diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index e25050c7ff..0e0221271e 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -9,6 +9,7 @@ enum insn_type { NOP = 1, /* site cond-call */ JMP = 2, /* tramp / site tail-call */ RET = 3, /* tramp / site cond-tail-call */ + JCC = 4, }; /* @@ -25,12 +26,40 @@ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; +static u8 __is_Jcc(u8 *insn) /* Jcc.d32 */ +{ + u8 ret = 0; + + if (insn[0] == 0x0f) { + u8 tmp = insn[1]; + if ((tmp & 0xf0) == 0x80) + ret = tmp; + } + + return ret; +} + +extern void __static_call_return(void); + +asm (".global __static_call_return\n\t" + ".type __static_call_return, @function\n\t" + ASM_FUNC_ALIGN "\n\t" + "__static_call_return:\n\t" + ANNOTATE_NOENDBR + ANNOTATE_RETPOLINE_SAFE + "ret; int3\n\t" + ".size __static_call_return, . - __static_call_return \n\t"); + static void __ref __static_call_transform(void *insn, enum insn_type type, void *func, bool modinit) { const void *emulate = NULL; int size = CALL_INSN_SIZE; const void *code; + u8 op, buf[6]; + + if ((type == JMP || type == RET) && (op = __is_Jcc(insn))) + type = JCC; switch (type) { case CALL: @@ -56,6 +85,20 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, else code = &retinsn; break; + + case JCC: + if (!func) { + func = __static_call_return; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + func = x86_return_thunk; + } + + buf[0] = 0x0f; + __text_gen_insn(buf+1, op, insn+1, func, 5); + code = buf; + size = 6; + + break; } if (memcmp(insn, code, size) == 0) @@ -67,13 +110,14 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, text_poke_bp(insn, code, size, emulate); } -static void __static_call_validate(void *insn, bool tail) +static void __static_call_validate(u8 *insn, bool tail) { - u8 opcode = *(u8 *)insn; + u8 opcode = insn[0]; if (tail) { if (opcode == JMP32_INSN_OPCODE || - opcode == RET_INSN_OPCODE) + opcode == RET_INSN_OPCODE || + __is_Jcc(insn)) return; } else { if (opcode == CALL_INSN_OPCODE || diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9452dc9664..7a1e3f53be 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -192,11 +192,9 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) cur->warned = false; /* - * If a non-zero TSC value for socket 0 may be valid then the default - * adjusted value cannot assumed to be zero either. + * The default adjust value cannot be assumed to be zero on any socket. */ - if (tsc_async_resets) - cur->adjusted = bootval; + cur->adjusted = bootval; /* * Check whether this CPU is the first in a package to come up. In diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b939b94d93..786584a990 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -189,6 +189,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_update_pv_runtime(vcpu); + vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); @@ -355,9 +356,9 @@ static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) } static __always_inline -void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) +void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_mask for non-scattered leafs. */ + /* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */ BUILD_BUG_ON(leaf < NCAPINTS); kvm_cpu_caps[leaf] = mask; @@ -367,7 +368,7 @@ void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_init_scattered for scattered leafs. */ + /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */ BUILD_BUG_ON(leaf >= NCAPINTS); kvm_cpu_caps[leaf] &= mask; @@ -469,11 +470,16 @@ void kvm_set_cpu_caps(void) F(AVX_VNNI) | F(AVX512_BF16) ); + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, + F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) | + F(BHI_CTRL) | F(MCDT_NO) + ); + kvm_cpu_cap_mask(CPUID_D_1_EAX, F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) ); - kvm_cpu_cap_init_scattered(CPUID_12_EAX, + kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, SF(SGX1) | SF(SGX2) ); @@ -710,13 +716,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) break; /* function 7 has additional index. */ case 7: - entry->eax = min(entry->eax, 1u); + max_idx = entry->eax = min(entry->eax, 2u); cpuid_entry_override(entry, CPUID_7_0_EBX); cpuid_entry_override(entry, CPUID_7_ECX); cpuid_entry_override(entry, CPUID_7_EDX); - /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */ - if (entry->eax == 1) { + /* KVM only supports up to 0x7.2, capped above via min(). */ + if (max_idx >= 1) { entry = do_host_cpuid(array, function, 1); if (!entry) goto out; @@ -726,6 +732,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ecx = 0; entry->edx = 0; } + if (max_idx >= 2) { + entry = do_host_cpuid(array, function, 2); + if (!entry) + goto out; + + cpuid_entry_override(entry, CPUID_7_2_EDX); + entry->ecx = 0; + entry->ebx = 0; + entry->eax = 0; + } break; case 0xa: { /* Architectural Performance Monitoring */ struct x86_pmu_capability cap; @@ -918,9 +934,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->eax = entry->ebx = entry->ecx = 0; break; case 0x80000008: { - unsigned g_phys_as = (entry->eax >> 16) & 0xff; - unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); - unsigned phys_as = entry->eax & 0xff; + unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U); + unsigned int phys_as; /* * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as @@ -928,16 +943,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) * reductions in MAXPHYADDR for memory encryption affect shadow * paging, too. * - * If TDP is enabled but an explicit guest MAXPHYADDR is not - * provided, use the raw bare metal MAXPHYADDR as reductions to - * the HPAs do not affect GPAs. + * If TDP is enabled, use the raw bare metal MAXPHYADDR as + * reductions to the HPAs do not affect GPAs. */ - if (!tdp_enabled) - g_phys_as = boot_cpu_data.x86_phys_bits; - else if (!g_phys_as) - g_phys_as = phys_as; + if (!tdp_enabled) { + phys_as = boot_cpu_data.x86_phys_bits; + } else { + phys_as = entry->eax & 0xff; + } - entry->eax = g_phys_as | (virt_as << 8); + entry->eax = phys_as | (virt_as << 8); entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8)); entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0008_EBX); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index c99edfff7f..3c0b2dddc9 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -121,6 +121,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu) return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx); } +static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.is_amd_compatible; +} + +static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu) +{ + return !guest_cpuid_is_amd_compatible(vcpu); +} + static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index b5b20078a4..8d0eaad7a7 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2419,7 +2419,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); - if (r && lvt_type == APIC_LVTPC) + if (r && lvt_type == APIC_LVTPC && + guest_cpuid_is_intel_compatible(apic->vcpu)) kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); return r; } diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4724289c8a..acb9193fc0 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4351,7 +4351,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, context->root_level, is_efer_nx(context), guest_can_use_gbpages(vcpu), is_cr4_pse(context), - guest_cpuid_is_amd_or_hygon(vcpu)); + guest_cpuid_is_amd_compatible(vcpu)); } static void diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 7eeade35a4..e43909d650 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -7,23 +7,44 @@ #include /* - * Hardware-defined CPUID leafs that are scattered in the kernel, but need to - * be directly used by KVM. Note, these word values conflict with the kernel's - * "bug" caps, but KVM doesn't use those. + * Hardware-defined CPUID leafs that are either scattered by the kernel or are + * unknown to the kernel, but need to be directly used by KVM. Note, these + * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, + CPUID_7_2_EDX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, }; +/* + * Define a KVM-only feature flag. + * + * For features that are scattered by cpufeatures.h, __feature_translate() also + * needs to be updated to translate the kernel-defined feature into the + * KVM-defined feature. + * + * For features that are 100% KVM-only, i.e. not defined by cpufeatures.h, + * forego the intermediate KVM_X86_FEATURE and directly define X86_FEATURE_* so + * that X86_FEATURE_* can be used in KVM. No __feature_translate() handling is + * needed in this case. + */ #define KVM_X86_FEATURE(w, f) ((w)*32 + (f)) /* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */ #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0) #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) +/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */ +#define X86_FEATURE_INTEL_PSFD KVM_X86_FEATURE(CPUID_7_2_EDX, 0) +#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) +#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) +#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) +#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) +#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) + struct cpuid_reg { u32 function; u32 index; @@ -49,6 +70,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, + [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, }; /* @@ -61,10 +83,12 @@ static const struct cpuid_reg reverse_cpuid[] = { */ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) { + BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS); BUILD_BUG_ON(x86_leaf == CPUID_LNX_1); BUILD_BUG_ON(x86_leaf == CPUID_LNX_2); BUILD_BUG_ON(x86_leaf == CPUID_LNX_3); BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_5); BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); } @@ -75,12 +99,17 @@ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) */ static __always_inline u32 __feature_translate(int x86_feature) { - if (x86_feature == X86_FEATURE_SGX1) - return KVM_X86_FEATURE_SGX1; - else if (x86_feature == X86_FEATURE_SGX2) - return KVM_X86_FEATURE_SGX2; - - return x86_feature; +#define KVM_X86_TRANSLATE_FEATURE(f) \ + case X86_FEATURE_##f: return KVM_X86_FEATURE_##f + + switch (x86_feature) { + KVM_X86_TRANSLATE_FEATURE(SGX1); + KVM_X86_TRANSLATE_FEATURE(SGX2); + KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); + KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); + default: + return x86_feature; + } } static __always_inline u32 __feature_leaf(int x86_feature) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 93d73b55ae..0f3d29f83c 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1662,20 +1662,22 @@ int svm_register_enc_region(struct kvm *kvm, goto e_free; } - region->uaddr = range->addr; - region->size = range->size; - - list_add_tail(®ion->list, &sev->regions_list); - mutex_unlock(&kvm->lock); - /* * The guest may change the memory encryption attribute from C=0 -> C=1 * or vice versa for this memory range. Lets make sure caches are * flushed to ensure that guest data gets written into memory with - * correct C-bit. + * correct C-bit. Note, this must be done before dropping kvm->lock, + * as region and its array of pages can be freed by a different task + * once kvm->lock is released. */ sev_clflush_pages(region->pages, region->npages); + region->uaddr = range->addr; + region->size = range->size; + + list_add_tail(®ion->list, &sev->regions_list); + mutex_unlock(&kvm->lock); + return ret; e_free: diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h index edc3f16cc1..6a9bfdfbb6 100644 --- a/arch/x86/kvm/vmx/run_flags.h +++ b/arch/x86/kvm/vmx/run_flags.h @@ -2,7 +2,10 @@ #ifndef __KVM_X86_VMX_RUN_FLAGS_H #define __KVM_X86_VMX_RUN_FLAGS_H -#define VMX_RUN_VMRESUME (1 << 0) -#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) +#define VMX_RUN_VMRESUME_SHIFT 0 +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 + +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 982138bebb..ef61bd6d07 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -77,7 +77,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - testb $VMX_RUN_VMRESUME, %bl + bt $VMX_RUN_VMRESUME_SHIFT, %bx /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -99,8 +99,11 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Check EFLAGS.ZF from 'testb' above */ - jz .Lvmlaunch + /* Clobbers EFLAGS.ZF */ + CLEAR_CPU_BUFFERS + + /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ + jnc .Lvmlaunch /* * After a successful VMRESUME/VMLAUNCH, control flow "magically" @@ -210,6 +213,8 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host + CLEAR_BRANCH_HISTORY_VMEXIT + /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 89744ee061..bedbd077e5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -398,7 +398,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && + vmx_fb_clear_ctrl_available; /* * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS @@ -6747,11 +6748,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, { kvm_guest_enter_irqoff(); - /* L1D Flush includes CPU buffer clear to mitigate MDS */ + /* + * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW + * mitigation for MDS is done late in VMentry and is still + * executed in spite of L1D Flush. This is because an extra VERW + * should not matter much after the big hammer L1D Flush. + */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) - mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) mds_clear_cpu_buffers(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index aa6f700f8c..7bfc037022 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1498,7 +1498,8 @@ static unsigned int num_msr_based_features; ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ - ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO) + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) static u64 kvm_get_arch_capabilities(void) { @@ -1535,6 +1536,8 @@ static u64 kvm_get_arch_capabilities(void) data |= ARCH_CAP_SSB_NO; if (!boot_cpu_has_bug(X86_BUG_MDS)) data |= ARCH_CAP_MDS_NO; + if (!boot_cpu_has_bug(X86_BUG_RFDS)) + data |= ARCH_CAP_RFDS_NO; if (!boot_cpu_has(X86_FEATURE_RTM)) { /* @@ -3104,7 +3107,7 @@ static void kvmclock_sync_fn(struct work_struct *work) static bool can_set_mci_status(struct kvm_vcpu *vcpu) { /* McStatusWrEn enabled? */ - if (guest_cpuid_is_amd_or_hygon(vcpu)) + if (guest_cpuid_is_amd_compatible(vcpu)) return !!(vcpu->arch.msr_hwcr & BIT_ULL(18)); return false; @@ -7105,7 +7108,17 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, } if (r < 0) - goto emul_write; + return X86EMUL_UNHANDLEABLE; + + /* + * Mark the page dirty _before_ checking whether or not the CMPXCHG was + * successful, as the old value is written back on failure. Note, for + * live migration, this is unnecessarily conservative as CMPXCHG writes + * back the original value and the access is atomic, but KVM's ABI is + * that all writes are dirty logged, regardless of the value written. + */ + kvm_vcpu_mark_page_dirty(vcpu, gpa_to_gfn(gpa)); + if (r) return X86EMUL_CMPXCHG_FAILED; @@ -9121,13 +9134,20 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu) static void kvm_inject_exception(struct kvm_vcpu *vcpu) { + /* + * Suppress the error code if the vCPU is in Real Mode, as Real Mode + * exceptions don't report error codes. The presence of an error code + * is carried with the exception and only stripped when the exception + * is injected as intercepted #PF VM-Exits for AMD's Paged Real Mode do + * report an error code despite the CPU being in Real Mode. + */ + vcpu->arch.exception.has_error_code &= is_protmode(vcpu); + trace_kvm_inj_exception(vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code, vcpu->arch.exception.injected); - if (vcpu->arch.exception.error_code && !is_protmode(vcpu)) - vcpu->arch.exception.error_code = false; static_call(kvm_x86_queue_exception)(vcpu); } diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 6f5321b36d..019096b66e 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -108,6 +108,7 @@ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ret int3 SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) #endif SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) @@ -249,9 +250,7 @@ SYM_CODE_START(srso_return_thunk) SYM_CODE_END(srso_return_thunk) SYM_FUNC_START(entry_untrain_ret) - ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ - "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ - "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS + ALTERNATIVE "jmp retbleed_untrain_ret", "jmp srso_untrain_ret", X86_FEATURE_SRSO SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index ec31f5b603..1c25c1072a 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -148,7 +148,7 @@ AVXcode: 65: SEG=GS (Prefix) 66: Operand-Size (Prefix) 67: Address-Size (Prefix) -68: PUSH Iz (d64) +68: PUSH Iz 69: IMUL Gv,Ev,Iz 6a: PUSH Ib (d64) 6b: IMUL Gv,Ev,Ib diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index abc6fbc3d5..31afd82b95 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -716,39 +716,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, WARN_ON_ONCE(user_mode(regs)); /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) { - /* - * Any interrupt that takes a fault gets the fixup. This makes - * the below recursive fault logic only apply to a faults from - * task context. - */ - if (in_interrupt()) - return; - - /* - * Per the above we're !in_interrupt(), aka. task context. - * - * In this case we need to make sure we're not recursively - * faulting through the emulate_vsyscall() logic. - */ - if (current->thread.sig_on_uaccess_err && signal) { - sanitize_error_code(address, &error_code); - - set_signal_archinfo(address, error_code); - - if (si_code == SEGV_PKUERR) { - force_sig_pkuerr((void __user *)address, pkey); - } else { - /* XXX: hwpoison faults will set the wrong code. */ - force_sig_fault(signal, si_code, (void __user *)address); - } - } - - /* - * Barring that, we can do the fixup and be happy. - */ + if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) return; - } /* * AMD erratum #91 manifests as a spurious page fault on a PREFETCH diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index f50cc210a9..968d7005f4 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; - bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - /* if this is already a gbpage, this portion is already mapped */ - if (pud_large(*pud)) - continue; - - /* Is using a gbpage allowed? */ - use_gbpage = info->direct_gbpages; - - /* Don't use gbpage if it maps more than the requested region. */ - /* at the begining: */ - use_gbpage &= ((addr & ~PUD_MASK) == 0); - /* ... or at the end: */ - use_gbpage &= ((next & ~PUD_MASK) == 0); - - /* Never overwrite existing mappings */ - use_gbpage &= !pud_present(*pud); - - if (use_gbpage) { + if (info->direct_gbpages) { pud_t pudval; + if (pud_present(*pud)) + continue; + + addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index d5ef64ddd3..f646602295 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -989,6 +989,38 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } +static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, + pgprot_t *pgprot) +{ + unsigned long prot; + + VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); + + /* + * We need the starting PFN and cachemode used for track_pfn_remap() + * that covered the whole VMA. For most mappings, we can obtain that + * information from the page tables. For COW mappings, we might now + * suddenly have anon folios mapped and follow_phys() will fail. + * + * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to + * detect the PFN. If we need the cachemode as well, we're out of luck + * for now and have to fail fork(). + */ + if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { + if (pgprot) + *pgprot = __pgprot(prot); + return 0; + } + if (is_cow_mapping(vma->vm_flags)) { + if (pgprot) + return -EINVAL; + *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + return 0; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + /* * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). @@ -999,20 +1031,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; - unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (vma->vm_flags & VM_PAT) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, &pgprot)) return -EINVAL; - } - pgprot = __pgprot(prot); + /* reserve the whole chunk covered by vma. */ return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } @@ -1087,7 +1112,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size) { resource_size_t paddr; - unsigned long prot; if (vma && !(vma->vm_flags & VM_PAT)) return; @@ -1095,11 +1119,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, /* free the chunk starting from pfn or the whole chunk */ paddr = (resource_size_t)pfn << PAGE_SHIFT; if (!paddr && !size) { - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, NULL)) return; - } - size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 7a7701d1e1..59373a4abf 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -41,7 +41,8 @@ KCOV_INSTRUMENT := n # make up the standalone purgatory.ro PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel -PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss -g0 +PURGATORY_CFLAGS := -mcmodel=small -ffreestanding -fno-zero-initialized-in-bss -g0 +PURGATORY_CFLAGS += -fpic -fvisibility=hidden PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING PURGATORY_CFLAGS += -fno-stack-protector diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 3167228ca1..d7549953bb 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -692,6 +692,15 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { continue; } + + /* + * Do not perform relocations in .notes sections; any + * values there are meant for pre-boot consumption (e.g. + * startup_xen). + */ + if (sec_applies->shdr.sh_type == SHT_NOTE) + continue; + sh_symtab = sec_symtab->symtab; sym_strtab = sec_symtab->link->strtab; for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 645a589edd..bfdb7b0cf4 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1336,7 +1336,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct blkcg_gq *blkg = iocg_to_blkg(iocg); - u64 tdelta, delay, new_delay; + u64 tdelta, delay, new_delay, shift; s64 vover, vover_pct; u32 hwa; @@ -1351,8 +1351,9 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) /* calculate the current delay in effect - 1/2 every second */ tdelta = now->now - iocg->delay_at; - if (iocg->delay) - delay = iocg->delay >> div64_u64(tdelta, USEC_PER_SEC); + shift = div64_u64(tdelta, USEC_PER_SEC); + if (iocg->delay && shift < BITS_PER_LONG) + delay = iocg->delay >> shift; else delay = 0; diff --git a/block/blk-settings.c b/block/blk-settings.c index 959b5c1e6d..1b92e66249 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -647,6 +647,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->zone_write_granularity = max(t->zone_write_granularity, b->zone_write_granularity); t->zoned = max(t->zoned, b->zoned); + if (!t->zoned) { + t->zone_write_granularity = 0; + t->max_zone_append_sectors = 0; + } return ret; } EXPORT_SYMBOL(blk_stack_limits); diff --git a/block/blk-stat.c b/block/blk-stat.c index ae3dd1fb8e..6e602f9b96 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -28,7 +28,7 @@ void blk_rq_stat_init(struct blk_rq_stat *stat) /* src is a per-cpu stat, mean isn't initialized */ void blk_rq_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) { - if (!src->nr_samples) + if (dst->nr_samples + src->nr_samples <= dst->nr_samples) return; dst->min = min(dst->min, src->min); diff --git a/crypto/algapi.c b/crypto/algapi.c index 565f591c4b..c390a79c5a 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -258,7 +258,6 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) } if (!strcmp(q->cra_driver_name, alg->cra_name) || - !strcmp(q->cra_driver_name, alg->cra_driver_name) || !strcmp(q->cra_name, alg->cra_driver_name)) goto err; } diff --git a/crypto/algboss.c b/crypto/algboss.c index 1814d2c518..eb5fe84efb 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -67,7 +67,7 @@ static int cryptomgr_probe(void *data) complete_all(¶m->larval->completion); crypto_alg_put(¶m->larval->alg); kfree(param); - module_put_and_exit(0); + module_put_and_kthread_exit(0); } static int cryptomgr_schedule_probe(struct crypto_larval *larval) @@ -190,7 +190,7 @@ static int cryptomgr_test(void *data) crypto_alg_tested(param->driver, err); kfree(param); - module_put_and_exit(0); + module_put_and_kthread_exit(0); } static int cryptomgr_schedule_test(struct crypto_alg *alg) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 1e7b15009b..da10517f29 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -373,4 +373,7 @@ module_exit(ecdsa_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stefan Berger "); MODULE_DESCRIPTION("ECDSA generic algorithm"); +MODULE_ALIAS_CRYPTO("ecdsa-nist-p192"); +MODULE_ALIAS_CRYPTO("ecdsa-nist-p256"); +MODULE_ALIAS_CRYPTO("ecdsa-nist-p384"); MODULE_ALIAS_CRYPTO("ecdsa-generic"); diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c index f7ed430206..0a970261b1 100644 --- a/crypto/ecrdsa.c +++ b/crypto/ecrdsa.c @@ -294,4 +294,5 @@ module_exit(ecrdsa_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vitaly Chikunov "); MODULE_DESCRIPTION("EC-RDSA generic algorithm"); +MODULE_ALIAS_CRYPTO("ecrdsa"); MODULE_ALIAS_CRYPTO("ecrdsa-generic"); diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index 7b20165341..3c3e911141 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -573,7 +573,7 @@ static u_long get_word(struct vc_data *vc) } attr_ch = get_char(vc, (u_short *)tmp_pos, &spk_attr); buf[cnt++] = attr_ch; - while (tmpx < vc->vc_cols - 1) { + while (tmpx < vc->vc_cols - 1 && cnt < ARRAY_SIZE(buf) - 1) { tmp_pos += 2; tmpx++; ch = get_char(vc, (u_short *)tmp_pos, &temp); diff --git a/drivers/accessibility/speakup/synth.c b/drivers/accessibility/speakup/synth.c index 2b8699673b..1e10f3c956 100644 --- a/drivers/accessibility/speakup/synth.c +++ b/drivers/accessibility/speakup/synth.c @@ -208,8 +208,10 @@ void spk_do_flush(void) wake_up_process(speakup_task); } -void synth_write(const char *buf, size_t count) +void synth_write(const char *_buf, size_t count) { + const unsigned char *buf = (const unsigned char *) _buf; + while (count--) synth_buffer_add(*buf++); synth_start(); diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile index f919811156..b6cf9c9bd6 100644 --- a/drivers/acpi/acpica/Makefile +++ b/drivers/acpi/acpica/Makefile @@ -5,6 +5,7 @@ ccflags-y := -D_LINUX -DBUILDING_ACPICA ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT +CFLAGS_tbfind.o += $(call cc-disable-warning, stringop-truncation) # use acpi.o to put all files here into acpi.o modparam namespace obj-y += acpi.o diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c index b91155ea9c..c9131259f7 100644 --- a/drivers/acpi/acpica/dbnames.c +++ b/drivers/acpi/acpica/dbnames.c @@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle, ACPI_FREE(buffer.pointer); buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; - acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); - + status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); + if (ACPI_FAILURE(status)) { + acpi_os_printf("Could Not evaluate object %p\n", + obj_handle); + return (AE_OK); + } /* * Since this is a field unit, surround the output in braces */ diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 7cc9183c8d..6dcce036ad 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -161,6 +161,13 @@ show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_freq); show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, reference_perf); show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); +/* Check for valid access_width, otherwise, fallback to using bit_width */ +#define GET_BIT_WIDTH(reg) ((reg)->access_width ? (8 << ((reg)->access_width - 1)) : (reg)->bit_width) + +/* Shift and apply the mask for CPC reads/writes */ +#define MASK_VAL(reg, val) (((val) >> (reg)->bit_offset) & \ + GENMASK(((reg)->bit_width) - 1, 0)) + static ssize_t show_feedback_ctrs(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -762,8 +769,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) } else if (gas_t->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { if (gas_t->address) { void __iomem *addr; + size_t access_width; - addr = ioremap(gas_t->address, gas_t->bit_width/8); + access_width = GET_BIT_WIDTH(gas_t) / 8; + addr = ioremap(gas_t->address, access_width); if (!addr) goto out_free; cpc_ptr->cpc_regs[i-2].sys_mem_vaddr = addr; @@ -936,6 +945,7 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) { int ret_val = 0; void __iomem *vaddr = NULL; + int size; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; @@ -945,17 +955,26 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) } *val = 0; - if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + size = GET_BIT_WIDTH(reg); + + if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_read_ffh(cpu, reg, val); else return acpi_os_read_memory((acpi_physical_address)reg->address, - val, reg->bit_width); + val, size); - switch (reg->bit_width) { + switch (size) { case 8: *val = readb_relaxed(vaddr); break; @@ -969,32 +988,53 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = readq_relaxed(vaddr); break; default: - pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot read %u bit width from system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", + size, pcc_ss_id); + } ret_val = -EFAULT; } + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + *val = MASK_VAL(reg, *val); + return ret_val; } static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) { int ret_val = 0; + int size; void __iomem *vaddr = NULL; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; - if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + size = GET_BIT_WIDTH(reg); + + if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_write_ffh(cpu, reg, val); else return acpi_os_write_memory((acpi_physical_address)reg->address, - val, reg->bit_width); + val, size); + + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + val = MASK_VAL(reg, val); - switch (reg->bit_width) { + switch (size) { case 8: writeb_relaxed(val, vaddr); break; @@ -1008,8 +1048,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) writeq_relaxed(val, vaddr); break; default: - pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot write %u bit width to system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", + size, pcc_ss_id); + } ret_val = -EFAULT; break; } diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 0f533aff23..da4b94c0d6 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -579,6 +579,18 @@ static const struct dmi_system_id lg_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "X577"), }, }, + { + /* TongFang GXxHRXx/TUXEDO InfinityBook Pro Gen9 AMD */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GXxHRXx"), + }, + }, + { + /* TongFang GMxHGxx/TUXEDO Stellaris Slim Gen1 AMD */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GMxHGxx"), + }, + }, { } }; diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index b277e25b27..95deb55fb9 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -382,18 +382,6 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"), }, }, - /* - * ASUS B1400CEAE hangs on resume from suspend (see - * https://bugzilla.kernel.org/show_bug.cgi?id=215742). - */ - { - .callback = init_default_s3, - .ident = "ASUS B1400CEAE", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK B1400CEAE"), - }, - }, {}, }; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 7547c4ed26..3abd5619a9 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1678,8 +1678,10 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (offset > buffer->data_size || read_size < sizeof(*hdr)) + if (offset > buffer->data_size || read_size < sizeof(*hdr) || + !IS_ALIGNED(offset, sizeof(u32))) return 0; + if (u) { if (copy_from_user(object, u + offset, read_size)) return 0; @@ -5163,7 +5165,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) goto err; break; case BINDER_SET_MAX_THREADS: { - int max_threads; + u32 max_threads; if (copy_from_user(&max_threads, ubuf, sizeof(max_threads))) { diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 1ade9799c8..da9ead1cff 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -420,7 +420,7 @@ struct binder_proc { struct list_head todo; struct binder_stats stats; struct list_head delivered_death; - int max_threads; + u32 max_threads; int requested_threads; int requested_threads_started; int tmp_ref; diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index b0a8aac008..7807c7b9f3 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -666,11 +666,6 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { - if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) { - dev_info(&pdev->dev, "ASM1166 has only six ports\n"); - hpriv->saved_port_map = 0x3f; - } - if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { dev_info(&pdev->dev, "JMB361 has only one port\n"); hpriv->force_port_map = 1; diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index 03c580625c..55b462ce99 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -173,8 +173,6 @@ static int legacy_port[NR_HOST] = { 0x1f0, 0x170, 0x1e8, 0x168, 0x1e0, 0x160 }; static struct legacy_probe probe_list[NR_HOST]; static struct legacy_data legacy_data[NR_HOST]; static struct ata_host *legacy_host[NR_HOST]; -static int nr_legacy_host; - /** * legacy_probe_add - Add interface to probe list @@ -1276,9 +1274,11 @@ static __exit void legacy_exit(void) { int i; - for (i = 0; i < nr_legacy_host; i++) { + for (i = 0; i < NR_HOST; i++) { struct legacy_data *ld = &legacy_data[i]; - ata_host_detach(legacy_host[i]); + + if (legacy_host[i]) + ata_host_detach(legacy_host[i]); platform_device_unregister(ld->platform_dev); } } diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c index 6fd54e968d..1564472fd5 100644 --- a/drivers/ata/sata_gemini.c +++ b/drivers/ata/sata_gemini.c @@ -201,7 +201,10 @@ int gemini_sata_start_bridge(struct sata_gemini *sg, unsigned int bridge) pclk = sg->sata0_pclk; else pclk = sg->sata1_pclk; - clk_enable(pclk); + ret = clk_enable(pclk); + if (ret) + return ret; + msleep(10); /* Do not keep clocking a bridge that is not online */ diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index c53633d47b..1c9dde51d4 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = { }, }; -static const struct pci_device_id mv_pci_tbl[] = { - { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, - { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, - { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, - { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, - /* RocketRAID 1720/174x have different identifiers */ - { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, - { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, - { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, - - { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, - { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, - { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, - { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, - { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, - - { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, - - /* Adaptec 1430SA */ - { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, - - /* Marvell 7042 support */ - { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, - - /* Highpoint RocketRAID PCIe series */ - { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, - { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, - - { } /* terminate list */ -}; - static const struct mv_hw_ops mv5xxx_ops = { .phy_errata = mv5_phy_errata, .enable_leds = mv5_enable_leds, @@ -4311,6 +4280,36 @@ static int mv_pci_init_one(struct pci_dev *pdev, static int mv_pci_device_resume(struct pci_dev *pdev); #endif +static const struct pci_device_id mv_pci_tbl[] = { + { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, + { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, + { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, + { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, + /* RocketRAID 1720/174x have different identifiers */ + { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, + { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, + { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, + + { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, + { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, + { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, + { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, + { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, + + { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, + + /* Adaptec 1430SA */ + { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, + + /* Marvell 7042 support */ + { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, + + /* Highpoint RocketRAID PCIe series */ + { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, + { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, + + { } /* terminate list */ +}; static struct pci_driver mv_pci_driver = { .name = DRV_NAME, @@ -4323,6 +4322,7 @@ static struct pci_driver mv_pci_driver = { #endif }; +MODULE_DEVICE_TABLE(pci, mv_pci_tbl); /** * mv_print_info - Dump key info to kernel log for perusal. @@ -4495,7 +4495,6 @@ static void __exit mv_exit(void) MODULE_AUTHOR("Brett Russ"); MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers"); MODULE_LICENSE("GPL v2"); -MODULE_DEVICE_TABLE(pci, mv_pci_tbl); MODULE_VERSION(DRV_VERSION); MODULE_ALIAS("platform:" DRV_NAME); diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index 4c01190a5e..c95685f693 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -1004,8 +1004,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, offset -= (idx * window_size); idx++; - dist = ((long) (window_size - (offset + size))) >= 0 ? size : - (long) (window_size - offset); + dist = min(size, window_size - offset); memcpy_fromio(psource, dimm_mmio + offset / 4, dist); psource += dist; @@ -1053,8 +1052,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource, readl(mmio + PDC_DIMM_WINDOW_CTLR); offset -= (idx * window_size); idx++; - dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size : - (long) (window_size - offset); + dist = min(size, window_size - offset); memcpy_toio(dimm_mmio + offset / 4, psource, dist); writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); diff --git a/drivers/base/core.c b/drivers/base/core.c index adf003a7e8..ca56fd7450 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -53,6 +53,7 @@ static unsigned int defer_sync_state_count = 1; static DEFINE_MUTEX(fwnode_link_lock); static bool fw_devlink_is_permissive(void); static bool fw_devlink_drv_reg_done; +static struct workqueue_struct *device_link_wq; /** * fwnode_link_add - Create a link between two fwnode_handles. @@ -501,12 +502,26 @@ static void devlink_dev_release(struct device *dev) /* * It may take a while to complete this work because of the SRCU * synchronization in device_link_release_fn() and if the consumer or - * supplier devices get deleted when it runs, so put it into the "long" - * workqueue. + * supplier devices get deleted when it runs, so put it into the + * dedicated workqueue. */ - queue_work(system_long_wq, &link->rm_work); + queue_work(device_link_wq, &link->rm_work); } +/** + * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate + */ +void device_link_wait_removal(void) +{ + /* + * devlink removal jobs are queued in the dedicated work queue. + * To be sure that all removal jobs are terminated, ensure that any + * scheduled work has run to completion. + */ + flush_workqueue(device_link_wq); +} +EXPORT_SYMBOL_GPL(device_link_wait_removal); + static struct class devlink_class = { .name = "devlink", .owner = THIS_MODULE, @@ -3825,9 +3840,14 @@ int __init devices_init(void) sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); if (!sysfs_dev_char_kobj) goto char_kobj_err; + device_link_wq = alloc_workqueue("device_link_wq", 0, 0); + if (!device_link_wq) + goto wq_err; return 0; + wq_err: + kobject_put(sysfs_dev_char_kobj); char_kobj_err: kobject_put(sysfs_dev_block_kobj); block_kobj_err: diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 46430cf240..93222cf391 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -589,6 +589,12 @@ ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -602,6 +608,7 @@ static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); +static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -617,6 +624,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_retbleed.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_spec_rstack_overflow.attr, + &dev_attr_reg_file_data_sampling.attr, NULL }; diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 6f2cdd8643..ab6eced7f5 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -362,8 +362,10 @@ void dev_pm_enable_wake_irq_complete(struct device *dev) return; if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED && - wirq->status & WAKE_IRQ_DEDICATED_REVERSE) + wirq->status & WAKE_IRQ_DEDICATED_REVERSE) { enable_irq(wirq->irq); + wirq->status |= WAKE_IRQ_DEDICATED_ENABLED; + } } /** diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 87791265e0..ec78d9ad3e 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2045,10 +2045,13 @@ static void __exit null_exit(void) if (g_queue_mode == NULL_Q_MQ && shared_tags) blk_mq_free_tag_set(&tag_set); + + mutex_destroy(&lock); } module_init(null_init); module_exit(null_exit); MODULE_AUTHOR("Jens Axboe "); +MODULE_DESCRIPTION("multi queue aware block test driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 2a4cc5d8c2..0635df202d 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -411,7 +411,7 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver) return PTR_ERR(skb); } - if (skb->len != sizeof(*ver)) { + if (!skb || skb->len != sizeof(*ver)) { bt_dev_err(hdev, "Intel version event size mismatch"); kfree_skb(skb); return -EILSEQ; diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 0f3943ac54..d4ae33a5f8 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -182,9 +182,10 @@ int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) } EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd); -static void qca_tlv_check_data(struct hci_dev *hdev, +static int qca_tlv_check_data(struct hci_dev *hdev, struct qca_fw_config *config, - u8 *fw_data, enum qca_btsoc_type soc_type) + u8 *fw_data, size_t fw_size, + enum qca_btsoc_type soc_type) { const u8 *data; u32 type_len; @@ -194,12 +195,16 @@ static void qca_tlv_check_data(struct hci_dev *hdev, struct tlv_type_patch *tlv_patch; struct tlv_type_nvm *tlv_nvm; uint8_t nvm_baud_rate = config->user_baud_rate; + u8 type; config->dnld_mode = QCA_SKIP_EVT_NONE; config->dnld_type = QCA_SKIP_EVT_NONE; switch (config->type) { case ELF_TYPE_PATCH: + if (fw_size < 7) + return -EINVAL; + config->dnld_mode = QCA_SKIP_EVT_VSE_CC; config->dnld_type = QCA_SKIP_EVT_VSE_CC; @@ -208,6 +213,9 @@ static void qca_tlv_check_data(struct hci_dev *hdev, bt_dev_dbg(hdev, "File version : 0x%x", fw_data[6]); break; case TLV_TYPE_PATCH: + if (fw_size < sizeof(struct tlv_type_hdr) + sizeof(struct tlv_type_patch)) + return -EINVAL; + tlv = (struct tlv_type_hdr *)fw_data; type_len = le32_to_cpu(tlv->type_len); tlv_patch = (struct tlv_type_patch *)tlv->data; @@ -247,25 +255,56 @@ static void qca_tlv_check_data(struct hci_dev *hdev, break; case TLV_TYPE_NVM: + if (fw_size < sizeof(struct tlv_type_hdr)) + return -EINVAL; + tlv = (struct tlv_type_hdr *)fw_data; type_len = le32_to_cpu(tlv->type_len); - length = (type_len >> 8) & 0x00ffffff; + length = type_len >> 8; + type = type_len & 0xff; - BT_DBG("TLV Type\t\t : 0x%x", type_len & 0x000000ff); + /* Some NVM files have more than one set of tags, only parse + * the first set when it has type 2 for now. When there is + * more than one set there is an enclosing header of type 4. + */ + if (type == 4) { + if (fw_size < 2 * sizeof(struct tlv_type_hdr)) + return -EINVAL; + + tlv++; + + type_len = le32_to_cpu(tlv->type_len); + length = type_len >> 8; + type = type_len & 0xff; + } + + BT_DBG("TLV Type\t\t : 0x%x", type); BT_DBG("Length\t\t : %d bytes", length); + if (type != 2) + break; + + if (fw_size < length + (tlv->data - fw_data)) + return -EINVAL; + idx = 0; data = tlv->data; - while (idx < length) { + while (idx < length - sizeof(struct tlv_type_nvm)) { tlv_nvm = (struct tlv_type_nvm *)(data + idx); tag_id = le16_to_cpu(tlv_nvm->tag_id); tag_len = le16_to_cpu(tlv_nvm->tag_len); + if (length < idx + sizeof(struct tlv_type_nvm) + tag_len) + return -EINVAL; + /* Update NVM tags as needed */ switch (tag_id) { case EDL_TAG_ID_HCI: + if (tag_len < 3) + return -EINVAL; + /* HCI transport layer parameters * enabling software inband sleep * onto controller side. @@ -281,6 +320,9 @@ static void qca_tlv_check_data(struct hci_dev *hdev, break; case EDL_TAG_ID_DEEP_SLEEP: + if (tag_len < 1) + return -EINVAL; + /* Sleep enable mask * enabling deep sleep feature on controller. */ @@ -289,14 +331,16 @@ static void qca_tlv_check_data(struct hci_dev *hdev, break; } - idx += (sizeof(u16) + sizeof(u16) + 8 + tag_len); + idx += sizeof(struct tlv_type_nvm) + tag_len; } break; default: BT_ERR("Unknown TLV type %d", config->type); - break; + return -EINVAL; } + + return 0; } static int qca_tlv_send_segment(struct hci_dev *hdev, int seg_size, @@ -446,7 +490,9 @@ static int qca_download_firmware(struct hci_dev *hdev, memcpy(data, fw->data, size); release_firmware(fw); - qca_tlv_check_data(hdev, config, data, soc_type); + ret = qca_tlv_check_data(hdev, config, data, size, soc_type); + if (ret) + goto out; segment = data; remain = size; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a862f859f7..8c3db223d6 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -439,6 +439,8 @@ static const struct usb_device_id blacklist_table[] = { /* Realtek 8852BE Bluetooth devices */ { USB_DEVICE(0x0cb8, 0xc559), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0bda, 0x4853), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x887b), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0xb85b), .driver_info = BTUSB_REALTEK | diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index d5f408851a..fb71caa31d 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1590,6 +1590,9 @@ static bool qca_prevent_wake(struct hci_dev *hdev) struct hci_uart *hu = hci_get_drvdata(hdev); bool wakeup; + if (!hu->serdev) + return true; + /* BT SoC attached through the serial bus is handled by the serdev driver. * So we need to use the device handle of the serdev driver to get the * status of device may wakeup. diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index 38b46c7d17..a97edbf745 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -296,28 +296,35 @@ static int register_device(int minor, struct pp_struct *pp) if (!port) { pr_warn("%s: no associated port!\n", name); rc = -ENXIO; - goto err; + goto err_free_name; + } + + index = ida_alloc(&ida_index, GFP_KERNEL); + if (index < 0) { + pr_warn("%s: failed to get index!\n", name); + rc = index; + goto err_put_port; } - index = ida_simple_get(&ida_index, 0, 0, GFP_KERNEL); memset(&ppdev_cb, 0, sizeof(ppdev_cb)); ppdev_cb.irq_func = pp_irq; ppdev_cb.flags = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0; ppdev_cb.private = pp; pdev = parport_register_dev_model(port, name, &ppdev_cb, index); - parport_put_port(port); if (!pdev) { pr_warn("%s: failed to register device!\n", name); rc = -ENXIO; - ida_simple_remove(&ida_index, index); - goto err; + ida_free(&ida_index, index); + goto err_put_port; } pp->pdev = pdev; pp->index = index; dev_dbg(&pdev->dev, "registered pardevice\n"); -err: +err_put_port: + parport_put_port(port); +err_free_name: kfree(name); return rc; } @@ -750,7 +757,7 @@ static int pp_release(struct inode *inode, struct file *file) if (pp->pdev) { parport_unregister_device(pp->pdev); - ida_simple_remove(&ida_index, pp->index); + ida_free(&ida_index, pp->index); pp->pdev = NULL; pr_debug(CHRDEV "%x: unregistered pardevice\n", minor); } diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 84397af4fb..dc2bcf58fc 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -37,7 +37,11 @@ static HLIST_HEAD(clk_root_list); static HLIST_HEAD(clk_orphan_list); static LIST_HEAD(clk_notifier_list); -static struct hlist_head *all_lists[] = { +/* List of registered clks that use runtime PM */ +static HLIST_HEAD(clk_rpm_list); +static DEFINE_MUTEX(clk_rpm_list_lock); + +static const struct hlist_head *all_lists[] = { &clk_root_list, &clk_orphan_list, NULL, @@ -59,6 +63,7 @@ struct clk_core { struct clk_hw *hw; struct module *owner; struct device *dev; + struct hlist_node rpm_node; struct device_node *of_node; struct clk_core *parent; struct clk_parent_map *parents; @@ -129,6 +134,89 @@ static void clk_pm_runtime_put(struct clk_core *core) pm_runtime_put_sync(core->dev); } +/** + * clk_pm_runtime_get_all() - Runtime "get" all clk provider devices + * + * Call clk_pm_runtime_get() on all runtime PM enabled clks in the clk tree so + * that disabling unused clks avoids a deadlock where a device is runtime PM + * resuming/suspending and the runtime PM callback is trying to grab the + * prepare_lock for something like clk_prepare_enable() while + * clk_disable_unused_subtree() holds the prepare_lock and is trying to runtime + * PM resume/suspend the device as well. + * + * Context: Acquires the 'clk_rpm_list_lock' and returns with the lock held on + * success. Otherwise the lock is released on failure. + * + * Return: 0 on success, negative errno otherwise. + */ +static int clk_pm_runtime_get_all(void) +{ + int ret; + struct clk_core *core, *failed; + + /* + * Grab the list lock to prevent any new clks from being registered + * or unregistered until clk_pm_runtime_put_all(). + */ + mutex_lock(&clk_rpm_list_lock); + + /* + * Runtime PM "get" all the devices that are needed for the clks + * currently registered. Do this without holding the prepare_lock, to + * avoid the deadlock. + */ + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { + ret = clk_pm_runtime_get(core); + if (ret) { + failed = core; + pr_err("clk: Failed to runtime PM get '%s' for clk '%s'\n", + dev_name(failed->dev), failed->name); + goto err; + } + } + + return 0; + +err: + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { + if (core == failed) + break; + + clk_pm_runtime_put(core); + } + mutex_unlock(&clk_rpm_list_lock); + + return ret; +} + +/** + * clk_pm_runtime_put_all() - Runtime "put" all clk provider devices + * + * Put the runtime PM references taken in clk_pm_runtime_get_all() and release + * the 'clk_rpm_list_lock'. + */ +static void clk_pm_runtime_put_all(void) +{ + struct clk_core *core; + + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) + clk_pm_runtime_put(core); + mutex_unlock(&clk_rpm_list_lock); +} + +static void clk_pm_runtime_init(struct clk_core *core) +{ + struct device *dev = core->dev; + + if (dev && pm_runtime_enabled(dev)) { + core->rpm_enabled = true; + + mutex_lock(&clk_rpm_list_lock); + hlist_add_head(&core->rpm_node, &clk_rpm_list); + mutex_unlock(&clk_rpm_list_lock); + } +} + /*** locking ***/ static void clk_prepare_lock(void) { @@ -1252,9 +1340,6 @@ static void __init clk_unprepare_unused_subtree(struct clk_core *core) if (core->flags & CLK_IGNORE_UNUSED) return; - if (clk_pm_runtime_get(core)) - return; - if (clk_core_is_prepared(core)) { trace_clk_unprepare(core); if (core->ops->unprepare_unused) @@ -1263,8 +1348,6 @@ static void __init clk_unprepare_unused_subtree(struct clk_core *core) core->ops->unprepare(core->hw); trace_clk_unprepare_complete(core); } - - clk_pm_runtime_put(core); } static void __init clk_disable_unused_subtree(struct clk_core *core) @@ -1280,9 +1363,6 @@ static void __init clk_disable_unused_subtree(struct clk_core *core) if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_prepare_enable(core->parent); - if (clk_pm_runtime_get(core)) - goto unprepare_out; - flags = clk_enable_lock(); if (core->enable_count) @@ -1307,8 +1387,6 @@ static void __init clk_disable_unused_subtree(struct clk_core *core) unlock_out: clk_enable_unlock(flags); - clk_pm_runtime_put(core); -unprepare_out: if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_disable_unprepare(core->parent); } @@ -1324,12 +1402,22 @@ __setup("clk_ignore_unused", clk_ignore_unused_setup); static int __init clk_disable_unused(void) { struct clk_core *core; + int ret; if (clk_ignore_unused) { pr_warn("clk: Not disabling unused clocks\n"); return 0; } + pr_info("clk: Disabling unused clocks\n"); + + ret = clk_pm_runtime_get_all(); + if (ret) + return ret; + /* + * Grab the prepare lock to keep the clk topology stable while iterating + * over clks. + */ clk_prepare_lock(); hlist_for_each_entry(core, &clk_root_list, child_node) @@ -1346,6 +1434,8 @@ static int __init clk_disable_unused(void) clk_prepare_unlock(); + clk_pm_runtime_put_all(); + return 0; } late_initcall_sync(clk_disable_unused); @@ -3652,9 +3742,6 @@ static int __clk_core_init(struct clk_core *core) } clk_core_reparent_orphans_nolock(); - - - kref_init(&core->ref); out: clk_pm_runtime_put(core); unlock: @@ -3883,6 +3970,22 @@ static void clk_core_free_parent_map(struct clk_core *core) kfree(core->parents); } +/* Free memory allocated for a struct clk_core */ +static void __clk_release(struct kref *ref) +{ + struct clk_core *core = container_of(ref, struct clk_core, ref); + + if (core->rpm_enabled) { + mutex_lock(&clk_rpm_list_lock); + hlist_del(&core->rpm_node); + mutex_unlock(&clk_rpm_list_lock); + } + + clk_core_free_parent_map(core); + kfree_const(core->name); + kfree(core); +} + static struct clk * __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) { @@ -3903,6 +4006,8 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) goto fail_out; } + kref_init(&core->ref); + core->name = kstrdup_const(init->name, GFP_KERNEL); if (!core->name) { ret = -ENOMEM; @@ -3915,9 +4020,8 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) } core->ops = init->ops; - if (dev && pm_runtime_enabled(dev)) - core->rpm_enabled = true; core->dev = dev; + clk_pm_runtime_init(core); core->of_node = np; if (dev && dev->driver) core->owner = dev->driver->owner; @@ -3957,12 +4061,10 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) hw->clk = NULL; fail_create_clk: - clk_core_free_parent_map(core); fail_parents: fail_ops: - kfree_const(core->name); fail_name: - kfree(core); + kref_put(&core->ref, __clk_release); fail_out: return ERR_PTR(ret); } @@ -4042,18 +4144,6 @@ int of_clk_hw_register(struct device_node *node, struct clk_hw *hw) } EXPORT_SYMBOL_GPL(of_clk_hw_register); -/* Free memory allocated for a clock. */ -static void __clk_release(struct kref *ref) -{ - struct clk_core *core = container_of(ref, struct clk_core, ref); - - lockdep_assert_held(&prepare_lock); - - clk_core_free_parent_map(core); - kfree_const(core->name); - kfree(core); -} - /* * Empty clk_ops for unregistered clocks. These are used temporarily * after clk_unregister() was called on a clock and until last clock @@ -4106,7 +4196,7 @@ static void clk_core_evict_parent_cache_subtree(struct clk_core *root, /* Remove this clk from all parent caches */ static void clk_core_evict_parent_cache(struct clk_core *core) { - struct hlist_head **lists; + const struct hlist_head **lists; struct clk_core *root; lockdep_assert_held(&prepare_lock); @@ -4137,7 +4227,8 @@ void clk_unregister(struct clk *clk) if (ops == &clk_nodrv_ops) { pr_err("%s: unregistered clock: %s\n", __func__, clk->core->name); - goto unlock; + clk_prepare_unlock(); + return; } /* * Assign empty clock ops for consumers that might still hold @@ -4171,11 +4262,10 @@ void clk_unregister(struct clk *clk) if (clk->core->protect_count) pr_warn("%s: unregistering protected clock: %s\n", __func__, clk->core->name); + clk_prepare_unlock(); kref_put(&clk->core->ref, __clk_release); free_clk(clk); -unlock: - clk_prepare_unlock(); } EXPORT_SYMBOL_GPL(clk_unregister); @@ -4381,13 +4471,11 @@ void __clk_put(struct clk *clk) clk->max_rate < clk->core->req_rate) clk_core_set_rate_nolock(clk->core, clk->core->req_rate); - owner = clk->core->owner; - kref_put(&clk->core->ref, __clk_release); - clk_prepare_unlock(); + owner = clk->core->owner; + kref_put(&clk->core->ref, __clk_release); module_put(owner); - free_clk(clk); } diff --git a/drivers/clk/qcom/gcc-ipq6018.c b/drivers/clk/qcom/gcc-ipq6018.c index 4c5c7a8f41..b9844e41cf 100644 --- a/drivers/clk/qcom/gcc-ipq6018.c +++ b/drivers/clk/qcom/gcc-ipq6018.c @@ -1557,6 +1557,7 @@ static struct clk_regmap_div nss_ubi0_div_clk_src = { static const struct freq_tbl ftbl_pcie_aux_clk_src[] = { F(24000000, P_XO, 1, 0, 0), + { } }; static const struct clk_parent_data gcc_xo_gpll0_core_pi_sleep_clk[] = { @@ -1737,6 +1738,7 @@ static const struct freq_tbl ftbl_sdcc_ice_core_clk_src[] = { F(160000000, P_GPLL0, 5, 0, 0), F(216000000, P_GPLL6, 5, 0, 0), F(308570000, P_GPLL6, 3.5, 0, 0), + { } }; static const struct clk_parent_data gcc_xo_gpll0_gpll6_gpll0_div2[] = { diff --git a/drivers/clk/qcom/gcc-ipq8074.c b/drivers/clk/qcom/gcc-ipq8074.c index 0393154fea..649e75a41f 100644 --- a/drivers/clk/qcom/gcc-ipq8074.c +++ b/drivers/clk/qcom/gcc-ipq8074.c @@ -972,6 +972,7 @@ static struct clk_rcg2 pcie0_axi_clk_src = { static const struct freq_tbl ftbl_pcie_aux_clk_src[] = { F(19200000, P_XO, 1, 0, 0), + { } }; static struct clk_rcg2 pcie0_aux_clk_src = { @@ -1077,6 +1078,7 @@ static const struct freq_tbl ftbl_sdcc_ice_core_clk_src[] = { F(19200000, P_XO, 1, 0, 0), F(160000000, P_GPLL0, 5, 0, 0), F(308570000, P_GPLL6, 3.5, 0, 0), + { } }; static struct clk_rcg2 sdcc1_ice_core_clk_src = { diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c index 58aa3ec9a7..fffdb48007 100644 --- a/drivers/clk/qcom/gcc-sdm845.c +++ b/drivers/clk/qcom/gcc-sdm845.c @@ -3642,3 +3642,4 @@ module_exit(gcc_sdm845_exit); MODULE_DESCRIPTION("QTI GCC SDM845 Driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:gcc-sdm845"); +MODULE_SOFTDEP("pre: rpmhpd"); diff --git a/drivers/clk/qcom/mmcc-apq8084.c b/drivers/clk/qcom/mmcc-apq8084.c index fbfcf00067..c2fd0e8f4b 100644 --- a/drivers/clk/qcom/mmcc-apq8084.c +++ b/drivers/clk/qcom/mmcc-apq8084.c @@ -333,6 +333,7 @@ static struct freq_tbl ftbl_mmss_axi_clk[] = { F(333430000, P_MMPLL1, 3.5, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), F(466800000, P_MMPLL1, 2.5, 0, 0), + { } }; static struct clk_rcg2 mmss_axi_clk_src = { @@ -357,6 +358,7 @@ static struct freq_tbl ftbl_ocmemnoc_clk[] = { F(150000000, P_GPLL0, 4, 0, 0), F(228570000, P_MMPLL0, 3.5, 0, 0), F(320000000, P_MMPLL0, 2.5, 0, 0), + { } }; static struct clk_rcg2 ocmemnoc_clk_src = { diff --git a/drivers/clk/qcom/mmcc-msm8974.c b/drivers/clk/qcom/mmcc-msm8974.c index a1552b6771..26509bcd4f 100644 --- a/drivers/clk/qcom/mmcc-msm8974.c +++ b/drivers/clk/qcom/mmcc-msm8974.c @@ -267,6 +267,7 @@ static struct freq_tbl ftbl_mmss_axi_clk[] = { F(291750000, P_MMPLL1, 4, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), F(466800000, P_MMPLL1, 2.5, 0, 0), + { } }; static struct clk_rcg2 mmss_axi_clk_src = { @@ -291,6 +292,7 @@ static struct freq_tbl ftbl_ocmemnoc_clk[] = { F(150000000, P_GPLL0, 4, 0, 0), F(291750000, P_MMPLL1, 4, 0, 0), F(400000000, P_MMPLL0, 2, 0, 0), + { } }; static struct clk_rcg2 ocmemnoc_clk_src = { diff --git a/drivers/clk/qcom/mmcc-msm8998.c b/drivers/clk/qcom/mmcc-msm8998.c index a68764cfb7..5e2e60c1c2 100644 --- a/drivers/clk/qcom/mmcc-msm8998.c +++ b/drivers/clk/qcom/mmcc-msm8998.c @@ -2587,6 +2587,8 @@ static struct clk_hw *mmcc_msm8998_hws[] = { static struct gdsc video_top_gdsc = { .gdscr = 0x1024, + .cxcs = (unsigned int []){ 0x1028, 0x1034, 0x1038 }, + .cxc_count = 3, .pd = { .name = "video_top", }, @@ -2595,20 +2597,26 @@ static struct gdsc video_top_gdsc = { static struct gdsc video_subcore0_gdsc = { .gdscr = 0x1040, + .cxcs = (unsigned int []){ 0x1048 }, + .cxc_count = 1, .pd = { .name = "video_subcore0", }, .parent = &video_top_gdsc.pd, .pwrsts = PWRSTS_OFF_ON, + .flags = HW_CTRL, }; static struct gdsc video_subcore1_gdsc = { .gdscr = 0x1044, + .cxcs = (unsigned int []){ 0x104c }, + .cxc_count = 1, .pd = { .name = "video_subcore1", }, .parent = &video_top_gdsc.pd, .pwrsts = PWRSTS_OFF_ON, + .flags = HW_CTRL, }; static struct gdsc mdss_gdsc = { diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c index c0800da2fa..736a781e40 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c @@ -1181,12 +1181,19 @@ static const u32 usb2_clk_regs[] = { SUN50I_H6_USB3_CLK_REG, }; +static struct ccu_mux_nb sun50i_h6_cpu_nb = { + .common = &cpux_clk.common, + .cm = &cpux_clk.mux, + .delay_us = 1, + .bypass_index = 0, /* index of 24 MHz oscillator */ +}; + static int sun50i_h6_ccu_probe(struct platform_device *pdev) { struct resource *res; void __iomem *reg; + int i, ret; u32 val; - int i; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); reg = devm_ioremap_resource(&pdev->dev, res); @@ -1240,7 +1247,15 @@ static int sun50i_h6_ccu_probe(struct platform_device *pdev) val |= BIT(24); writel(val, reg + SUN50I_H6_HDMI_CEC_CLK_REG); - return devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_h6_ccu_desc); + ret = devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_h6_ccu_desc); + if (ret) + return ret; + + /* Reparent CPU during PLL CPUX rate changes */ + ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk, + &sun50i_h6_cpu_nb); + + return 0; } static const struct of_device_id sun50i_h6_ccu_ids[] = { diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index 44a61dc6f9..e1c773bb55 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -32,7 +32,7 @@ #define GT_CONTROL_IRQ_ENABLE BIT(2) /* banked */ #define GT_CONTROL_AUTO_INC BIT(3) /* banked */ #define GT_CONTROL_PRESCALER_SHIFT 8 -#define GT_CONTROL_PRESCALER_MAX 0xF +#define GT_CONTROL_PRESCALER_MAX 0xFF #define GT_CONTROL_PRESCALER_MASK (GT_CONTROL_PRESCALER_MAX << \ GT_CONTROL_PRESCALER_SHIFT) diff --git a/drivers/comedi/drivers/vmk80xx.c b/drivers/comedi/drivers/vmk80xx.c index 9a1d146b7e..07d10373b9 100644 --- a/drivers/comedi/drivers/vmk80xx.c +++ b/drivers/comedi/drivers/vmk80xx.c @@ -642,33 +642,22 @@ static int vmk80xx_find_usb_endpoints(struct comedi_device *dev) struct vmk80xx_private *devpriv = dev->private; struct usb_interface *intf = comedi_to_usb_interface(dev); struct usb_host_interface *iface_desc = intf->cur_altsetting; - struct usb_endpoint_descriptor *ep_desc; - int i; - - if (iface_desc->desc.bNumEndpoints != 2) - return -ENODEV; - - for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { - ep_desc = &iface_desc->endpoint[i].desc; - - if (usb_endpoint_is_int_in(ep_desc) || - usb_endpoint_is_bulk_in(ep_desc)) { - if (!devpriv->ep_rx) - devpriv->ep_rx = ep_desc; - continue; - } + struct usb_endpoint_descriptor *ep_rx_desc, *ep_tx_desc; + int ret; - if (usb_endpoint_is_int_out(ep_desc) || - usb_endpoint_is_bulk_out(ep_desc)) { - if (!devpriv->ep_tx) - devpriv->ep_tx = ep_desc; - continue; - } - } + if (devpriv->model == VMK8061_MODEL) + ret = usb_find_common_endpoints(iface_desc, &ep_rx_desc, + &ep_tx_desc, NULL, NULL); + else + ret = usb_find_common_endpoints(iface_desc, NULL, NULL, + &ep_rx_desc, &ep_tx_desc); - if (!devpriv->ep_rx || !devpriv->ep_tx) + if (ret) return -ENODEV; + devpriv->ep_rx = ep_rx_desc; + devpriv->ep_tx = ep_tx_desc; + if (!usb_endpoint_maxp(devpriv->ep_rx) || !usb_endpoint_maxp(devpriv->ep_tx)) return -EINVAL; diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 38ec0fedb2..552db816ed 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -481,10 +481,11 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct private_data *priv; + if (!policy) return 0; - struct private_data *priv = policy->driver_data; - + priv = policy->driver_data; cpufreq_cpu_put(policy); return brcm_avs_get_frequency(priv->base); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index e0ff09d66c..17cfa2b92e 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -615,10 +615,15 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) { struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct cppc_cpudata *cpu_data = policy->driver_data; + struct cppc_cpudata *cpu_data; u64 delivered_perf; int ret; + if (!policy) + return -ENODEV; + + cpu_data = policy->driver_data; + cpufreq_cpu_put(policy); ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0); @@ -697,10 +702,15 @@ static struct cpufreq_driver cppc_cpufreq_driver = { static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct cppc_cpudata *cpu_data = policy->driver_data; + struct cppc_cpudata *cpu_data; u64 desired_perf; int ret; + if (!policy) + return -ENODEV; + + cpu_data = policy->driver_data; + cpufreq_cpu_put(policy); ret = cppc_get_desired_perf(cpu, &desired_perf); diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 8fcaba5415..1bfdfa7e25 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -208,7 +208,7 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) if (!priv) return -ENOMEM; - if (!alloc_cpumask_var(&priv->cpus, GFP_KERNEL)) + if (!zalloc_cpumask_var(&priv->cpus, GFP_KERNEL)) return -ENOMEM; cpumask_set_cpu(cpu, priv->cpus); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c2227be7ba..a7bbe6f28b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1572,47 +1572,36 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return 0; } -static int cpufreq_offline(unsigned int cpu) +static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy) { - struct cpufreq_policy *policy; int ret; - pr_debug("%s: unregistering CPU %u\n", __func__, cpu); - - policy = cpufreq_cpu_get_raw(cpu); - if (!policy) { - pr_debug("%s: No cpu_data found\n", __func__); - return 0; - } - - down_write(&policy->rwsem); if (has_target()) cpufreq_stop_governor(policy); cpumask_clear_cpu(cpu, policy->cpus); - if (policy_is_inactive(policy)) { - if (has_target()) - strncpy(policy->last_governor, policy->governor->name, - CPUFREQ_NAME_LEN); - else - policy->last_policy = policy->policy; - } else if (cpu == policy->cpu) { - /* Nominate new CPU */ - policy->cpu = cpumask_any(policy->cpus); - } - - /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { + /* Nominate a new CPU if necessary. */ + if (cpu == policy->cpu) + policy->cpu = cpumask_any(policy->cpus); + + /* Start the governor again for the active policy. */ if (has_target()) { ret = cpufreq_start_governor(policy); if (ret) pr_err("%s: Failed to start governor\n", __func__); } - goto unlock; + return; } + if (has_target()) + strncpy(policy->last_governor, policy->governor->name, + CPUFREQ_NAME_LEN); + else + policy->last_policy = policy->policy; + if (cpufreq_thermal_control_enabled(cpufreq_driver)) { cpufreq_cooling_unregister(policy->cdev); policy->cdev = NULL; @@ -1627,12 +1616,31 @@ static int cpufreq_offline(unsigned int cpu) */ if (cpufreq_driver->offline) { cpufreq_driver->offline(policy); - } else if (cpufreq_driver->exit) { + return; + } + + if (cpufreq_driver->exit) cpufreq_driver->exit(policy); - policy->freq_table = NULL; + + policy->freq_table = NULL; +} + +static int cpufreq_offline(unsigned int cpu) +{ + struct cpufreq_policy *policy; + + pr_debug("%s: unregistering CPU %u\n", __func__, cpu); + + policy = cpufreq_cpu_get_raw(cpu); + if (!policy) { + pr_debug("%s: No cpu_data found\n", __func__); + return 0; } -unlock: + down_write(&policy->rwsem); + + __cpufreq_offline(cpu, policy); + up_write(&policy->rwsem); return 0; } @@ -1650,19 +1658,26 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (!policy) return; + down_write(&policy->rwsem); + if (cpu_online(cpu)) - cpufreq_offline(cpu); + __cpufreq_offline(cpu, policy); cpumask_clear_cpu(cpu, policy->real_cpus); remove_cpu_dev_symlink(policy, dev); - if (cpumask_empty(policy->real_cpus)) { - /* We did light-weight exit earlier, do full tear down now */ - if (cpufreq_driver->offline) - cpufreq_driver->exit(policy); - - cpufreq_policy_free(policy); + if (!cpumask_empty(policy->real_cpus)) { + up_write(&policy->rwsem); + return; } + + /* We did light-weight exit earlier, do full tear down now */ + if (cpufreq_driver->offline && cpufreq_driver->exit) + cpufreq_driver->exit(policy); + + up_write(&policy->rwsem); + + cpufreq_policy_free(policy); } /** diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index f70aa17e2a..c594e28add 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "cpuidle.h" @@ -185,7 +186,7 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) s->target_residency_ns = 0; if (s->exit_latency > 0) - s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC; + s->exit_latency_ns = mul_u32_u32(s->exit_latency, NSEC_PER_USEC); else if (s->exit_latency_ns < 0) s->exit_latency_ns = 0; } diff --git a/drivers/crypto/bcm/spu2.c b/drivers/crypto/bcm/spu2.c index 07989bb8c2..3fdc64b5a6 100644 --- a/drivers/crypto/bcm/spu2.c +++ b/drivers/crypto/bcm/spu2.c @@ -495,7 +495,7 @@ static void spu2_dump_omd(u8 *omd, u16 hash_key_len, u16 ciph_key_len, if (hash_iv_len) { packet_log(" Hash IV Length %u bytes\n", hash_iv_len); packet_dump(" hash IV: ", ptr, hash_iv_len); - ptr += ciph_key_len; + ptr += hash_iv_len; } if (ciph_iv_len) { diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c index 9dba52fbee..121f9d0cb6 100644 --- a/drivers/crypto/ccp/sp-platform.c +++ b/drivers/crypto/ccp/sp-platform.c @@ -39,44 +39,38 @@ static const struct sp_dev_vdata dev_vdata[] = { }, }; -#ifdef CONFIG_ACPI static const struct acpi_device_id sp_acpi_match[] = { { "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] }, { }, }; MODULE_DEVICE_TABLE(acpi, sp_acpi_match); -#endif -#ifdef CONFIG_OF static const struct of_device_id sp_of_match[] = { { .compatible = "amd,ccp-seattle-v1a", .data = (const void *)&dev_vdata[0] }, { }, }; MODULE_DEVICE_TABLE(of, sp_of_match); -#endif static struct sp_dev_vdata *sp_get_of_version(struct platform_device *pdev) { -#ifdef CONFIG_OF const struct of_device_id *match; match = of_match_node(sp_of_match, pdev->dev.of_node); if (match && match->data) return (struct sp_dev_vdata *)match->data; -#endif + return NULL; } static struct sp_dev_vdata *sp_get_acpi_version(struct platform_device *pdev) { -#ifdef CONFIG_ACPI const struct acpi_device_id *match; match = acpi_match_device(sp_acpi_match, &pdev->dev); if (match && match->driver_data) return (struct sp_dev_vdata *)match->driver_data; -#endif + return NULL; } @@ -222,12 +216,8 @@ static int sp_platform_resume(struct platform_device *pdev) static struct platform_driver sp_platform_driver = { .driver = { .name = "ccp", -#ifdef CONFIG_ACPI .acpi_match_table = sp_acpi_match, -#endif -#ifdef CONFIG_OF .of_match_table = sp_of_match, -#endif }, .probe = sp_platform_probe, .remove = sp_platform_remove, diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index ed3e40bc56..56c45c3408 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -95,7 +95,8 @@ static void adf_device_reset_worker(struct work_struct *work) if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) { /* The device hanged and we can't restart it so stop here */ dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); - kfree(reset_data); + if (reset_data->mode == ADF_DEV_RESET_ASYNC) + kfree(reset_data); WARN(1, "QAT: device restart failed. Device is unusable\n"); return; } @@ -103,10 +104,10 @@ static void adf_device_reset_worker(struct work_struct *work) clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status); /* The dev is back alive. Notify the caller if in sync mode */ - if (reset_data->mode == ADF_DEV_RESET_SYNC) - complete(&reset_data->compl); - else + if (reset_data->mode == ADF_DEV_RESET_ASYNC) kfree(reset_data); + else + complete(&reset_data->compl); } static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, @@ -138,6 +139,7 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, if (!timeout) { dev_err(&GET_DEV(accel_dev), "Reset device timeout expired\n"); + cancel_work_sync(&reset_data->reset_work); ret = -EFAULT; } kfree(reset_data); diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 101394f169..237bce21d1 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -110,12 +110,12 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) seq_printf(s, "%s: %d\n", obj->name, obj->value); - spin_lock_irq(&obj->lock); + spin_lock(&obj->lock); /* Caller already disabled IRQ. */ list_for_each(pos, &obj->pt_list) { struct sync_pt *pt = container_of(pos, struct sync_pt, link); sync_print_fence(s, &pt->base, false); } - spin_unlock_irq(&obj->lock); + spin_unlock(&obj->lock); } static void sync_print_sync_file(struct seq_file *s, diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c index f4c07ad3be..89e4a3e1d5 100644 --- a/drivers/dma/idma64.c +++ b/drivers/dma/idma64.c @@ -167,6 +167,10 @@ static irqreturn_t idma64_irq(int irq, void *dev) u32 status_err; unsigned short i; + /* Since IRQ may be shared, check if DMA controller is powered on */ + if (status == GENMASK(31, 0)) + return IRQ_NONE; + dev_vdbg(idma64->dma.dev, "%s: status=%#x\n", __func__, status); /* Check if we have any interrupt from the DMA controller */ @@ -590,7 +594,9 @@ static int idma64_probe(struct idma64_chip *chip) idma64->dma.dev = chip->sysdev; - dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK); + ret = dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK); + if (ret) + return ret; ret = dma_async_device_register(&idma64->dma); if (ret) diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c index d73004f47c..612ef13b71 100644 --- a/drivers/dma/idxd/perfmon.c +++ b/drivers/dma/idxd/perfmon.c @@ -529,14 +529,11 @@ static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node) return 0; target = cpumask_any_but(cpu_online_mask, cpu); - /* migrate events if there is a valid target */ - if (target < nr_cpu_ids) + if (target < nr_cpu_ids) { cpumask_set_cpu(target, &perfmon_dsa_cpu_mask); - else - target = -1; - - perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + } return 0; } diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c index 1f0bbaed46..9739c6c621 100644 --- a/drivers/dma/owl-dma.c +++ b/drivers/dma/owl-dma.c @@ -249,7 +249,7 @@ static void pchan_update(struct owl_dma_pchan *pchan, u32 reg, else regval &= ~val; - writel(val, pchan->base + reg); + writel(regval, pchan->base + reg); } static void pchan_writel(struct owl_dma_pchan *pchan, u32 reg, u32 data) @@ -273,7 +273,7 @@ static void dma_update(struct owl_dma *od, u32 reg, u32 val, bool state) else regval &= ~val; - writel(val, od->base + reg); + writel(regval, od->base + reg); } static void dma_writel(struct owl_dma *od, u32 reg, u32 data) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index ce5c66e689..6eb68d986a 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -213,7 +213,8 @@ struct xilinx_dpdma_tx_desc { * @running: true if the channel is running * @first_frame: flag for the first frame of stream * @video_group: flag if multi-channel operation is needed for video channels - * @lock: lock to access struct xilinx_dpdma_chan + * @lock: lock to access struct xilinx_dpdma_chan. Must be taken before + * @vchan.lock, if both are to be held. * @desc_pool: descriptor allocation pool * @err_task: error IRQ bottom half handler * @desc: References to descriptors being processed @@ -1096,12 +1097,14 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan) * Complete the active descriptor, if any, promote the pending * descriptor to active, and queue the next transfer, if any. */ + spin_lock(&chan->vchan.lock); if (chan->desc.active) vchan_cookie_complete(&chan->desc.active->vdesc); chan->desc.active = pending; chan->desc.pending = NULL; xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock(&chan->vchan.lock); out: spin_unlock_irqrestore(&chan->lock, flags); @@ -1263,10 +1266,12 @@ static void xilinx_dpdma_issue_pending(struct dma_chan *dchan) struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); unsigned long flags; - spin_lock_irqsave(&chan->vchan.lock, flags); + spin_lock_irqsave(&chan->lock, flags); + spin_lock(&chan->vchan.lock); if (vchan_issue_pending(&chan->vchan)) xilinx_dpdma_chan_queue_transfer(chan); - spin_unlock_irqrestore(&chan->vchan.lock, flags); + spin_unlock(&chan->vchan.lock); + spin_unlock_irqrestore(&chan->lock, flags); } static int xilinx_dpdma_config(struct dma_chan *dchan, @@ -1490,7 +1495,9 @@ static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t) XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id); spin_lock_irqsave(&chan->lock, flags); + spin_lock(&chan->vchan.lock); xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock(&chan->vchan.lock); spin_unlock_irqrestore(&chan->lock, flags); } diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 8ec70da8d8..c46880a934 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -627,7 +627,7 @@ static int errcmd_enable_error_reporting(bool enable) rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd); if (rc) - return rc; + return pcibios_err_to_errno(rc); if (enable) errcmd |= ERRCMD_CE | ERRSTS_UE; @@ -636,7 +636,7 @@ static int errcmd_enable_error_reporting(bool enable) rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd); if (rc) - return rc; + return pcibios_err_to_errno(rc); return 0; } diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig index 7684b3afa6..ba0542ef4d 100644 --- a/drivers/extcon/Kconfig +++ b/drivers/extcon/Kconfig @@ -113,7 +113,8 @@ config EXTCON_MAX77843 config EXTCON_MAX8997 tristate "Maxim MAX8997 EXTCON Support" - depends on MFD_MAX8997 && IRQ_DOMAIN + depends on MFD_MAX8997 + select IRQ_DOMAIN help If you say yes here you get support for the MUIC device of Maxim MAX8997 PMIC. The MAX8997 MUIC is a USB port accessory diff --git a/drivers/firewire/nosy.c b/drivers/firewire/nosy.c index b0d671db17..ea31ac7ac1 100644 --- a/drivers/firewire/nosy.c +++ b/drivers/firewire/nosy.c @@ -148,10 +148,12 @@ packet_buffer_get(struct client *client, char __user *data, size_t user_length) if (atomic_read(&buffer->size) == 0) return -ENODEV; - /* FIXME: Check length <= user_length. */ + length = buffer->head->length; + + if (length > user_length) + return 0; end = buffer->data + buffer->capacity; - length = buffer->head->length; if (&buffer->head->data[length] < end) { if (copy_to_user(data, buffer->head->data, length)) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 667ff40f39..7d94e1cbc0 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2049,6 +2049,8 @@ static void bus_reset_work(struct work_struct *work) ohci->generation = generation; reg_write(ohci, OHCI1394_IntEventClear, OHCI1394_busReset); + if (param_debug & OHCI_PARAM_DEBUG_BUSRESETS) + reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset); if (ohci->quirks & QUIRK_RESET_PACKET) ohci->request_generation = generation; @@ -2115,12 +2117,14 @@ static irqreturn_t irq_handler(int irq, void *data) return IRQ_NONE; /* - * busReset and postedWriteErr must not be cleared yet + * busReset and postedWriteErr events must not be cleared yet * (OHCI 1.1 clauses 7.2.3.2 and 13.2.8.1) */ reg_write(ohci, OHCI1394_IntEventClear, event & ~(OHCI1394_busReset | OHCI1394_postedWriteErr)); log_irqs(ohci, event); + if (event & OHCI1394_busReset) + reg_write(ohci, OHCI1394_IntMaskClear, OHCI1394_busReset); if (event & OHCI1394_selfIDComplete) queue_work(selfid_workqueue, &ohci->bus_reset_work); diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c index 940ddf9162..77a8d43e65 100644 --- a/drivers/firmware/dmi-id.c +++ b/drivers/firmware/dmi-id.c @@ -169,9 +169,14 @@ static int dmi_dev_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +static void dmi_dev_release(struct device *dev) +{ + kfree(dev); +} + static struct class dmi_class = { .name = "dmi", - .dev_release = (void(*)(struct device *)) kfree, + .dev_release = dmi_dev_release, .dev_uevent = dmi_dev_uevent, }; diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index cae590bd08..eaed1ddcc8 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -415,7 +415,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head) { const struct efivar_operations *ops; - unsigned long variable_name_size = 1024; + unsigned long variable_name_size = 512; efi_char16_t *variable_name; efi_status_t status; efi_guid_t vendor_guid; @@ -438,12 +438,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), } /* - * Per EFI spec, the maximum storage allocated for both - * the variable name and variable data is 1024 bytes. + * A small set of old UEFI implementations reject sizes + * above a certain threshold, the lowest seen in the wild + * is 512. */ do { - variable_name_size = 1024; + variable_name_size = 512; status = ops->get_next_variable(&variable_name_size, variable_name, @@ -491,9 +492,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), break; case EFI_NOT_FOUND: break; + case EFI_BUFFER_TOO_SMALL: + pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n", + variable_name_size); + status = EFI_NOT_FOUND; + break; default: - printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n", - status); + pr_warn("efivars: get_next_variable: status=%lx\n", status); status = EFI_NOT_FOUND; break; } diff --git a/drivers/firmware/raspberrypi.c b/drivers/firmware/raspberrypi.c index dba315f675..ec223976c9 100644 --- a/drivers/firmware/raspberrypi.c +++ b/drivers/firmware/raspberrypi.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -96,8 +97,8 @@ int rpi_firmware_property_list(struct rpi_firmware *fw, if (size & 3) return -EINVAL; - buf = dma_alloc_coherent(fw->cl.dev, PAGE_ALIGN(size), &bus_addr, - GFP_ATOMIC); + buf = dma_alloc_coherent(fw->chan->mbox->dev, PAGE_ALIGN(size), + &bus_addr, GFP_ATOMIC); if (!buf) return -ENOMEM; @@ -125,7 +126,7 @@ int rpi_firmware_property_list(struct rpi_firmware *fw, ret = -EINVAL; } - dma_free_coherent(fw->cl.dev, PAGE_ALIGN(size), buf, bus_addr); + dma_free_coherent(fw->chan->mbox->dev, PAGE_ALIGN(size), buf, bus_addr); return ret; } diff --git a/drivers/fpga/dfl-fme-region.c b/drivers/fpga/dfl-fme-region.c index 1eeb42af10..4aebde0a7f 100644 --- a/drivers/fpga/dfl-fme-region.c +++ b/drivers/fpga/dfl-fme-region.c @@ -30,6 +30,7 @@ static int fme_region_get_bridges(struct fpga_region *region) static int fme_region_probe(struct platform_device *pdev) { struct dfl_fme_region_pdata *pdata = dev_get_platdata(&pdev->dev); + struct fpga_region_info info = { 0 }; struct device *dev = &pdev->dev; struct fpga_region *region; struct fpga_manager *mgr; @@ -39,20 +40,18 @@ static int fme_region_probe(struct platform_device *pdev) if (IS_ERR(mgr)) return -EPROBE_DEFER; - region = devm_fpga_region_create(dev, mgr, fme_region_get_bridges); - if (!region) { - ret = -ENOMEM; + info.mgr = mgr; + info.compat_id = mgr->compat_id; + info.get_bridges = fme_region_get_bridges; + info.priv = pdata; + region = fpga_region_register_full(dev, &info); + if (IS_ERR(region)) { + ret = PTR_ERR(region); goto eprobe_mgr_put; } - region->priv = pdata; - region->compat_id = mgr->compat_id; platform_set_drvdata(pdev, region); - ret = fpga_region_register(region); - if (ret) - goto eprobe_mgr_put; - dev_dbg(dev, "DFL FME FPGA Region probed\n"); return 0; diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c index c38143ef23..071c25c164 100644 --- a/drivers/fpga/dfl.c +++ b/drivers/fpga/dfl.c @@ -1407,19 +1407,15 @@ dfl_fpga_feature_devs_enumerate(struct dfl_fpga_enum_info *info) if (!cdev) return ERR_PTR(-ENOMEM); - cdev->region = devm_fpga_region_create(info->dev, NULL, NULL); - if (!cdev->region) { - ret = -ENOMEM; - goto free_cdev_exit; - } - cdev->parent = info->dev; mutex_init(&cdev->lock); INIT_LIST_HEAD(&cdev->port_dev_list); - ret = fpga_region_register(cdev->region); - if (ret) + cdev->region = fpga_region_register(info->dev, NULL, NULL); + if (IS_ERR(cdev->region)) { + ret = PTR_ERR(cdev->region); goto free_cdev_exit; + } /* create and init build info for enumeration */ binfo = devm_kzalloc(info->dev, sizeof(*binfo), GFP_KERNEL); diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c index a483871522..d73daea579 100644 --- a/drivers/fpga/fpga-region.c +++ b/drivers/fpga/fpga-region.c @@ -52,7 +52,7 @@ static struct fpga_region *fpga_region_get(struct fpga_region *region) } get_device(dev); - if (!try_module_get(dev->parent->driver->owner)) { + if (!try_module_get(region->ops_owner)) { put_device(dev); mutex_unlock(®ion->mutex); return ERR_PTR(-ENODEV); @@ -74,7 +74,7 @@ static void fpga_region_put(struct fpga_region *region) dev_dbg(dev, "put\n"); - module_put(dev->parent->driver->owner); + module_put(region->ops_owner); put_device(dev); mutex_unlock(®ion->mutex); } @@ -180,39 +180,45 @@ static struct attribute *fpga_region_attrs[] = { ATTRIBUTE_GROUPS(fpga_region); /** - * fpga_region_create - alloc and init a struct fpga_region + * __fpga_region_register_full - create and register an FPGA Region device * @parent: device parent - * @mgr: manager that programs this region - * @get_bridges: optional function to get bridges to a list - * - * The caller of this function is responsible for freeing the resulting region - * struct with fpga_region_free(). Using devm_fpga_region_create() instead is - * recommended. + * @info: parameters for FPGA Region + * @owner: module containing the get_bridges function * - * Return: struct fpga_region or NULL + * Return: struct fpga_region or ERR_PTR() */ -struct fpga_region -*fpga_region_create(struct device *parent, - struct fpga_manager *mgr, - int (*get_bridges)(struct fpga_region *)) +struct fpga_region * +__fpga_region_register_full(struct device *parent, const struct fpga_region_info *info, + struct module *owner) { struct fpga_region *region; int id, ret = 0; + if (!info) { + dev_err(parent, + "Attempt to register without required info structure\n"); + return ERR_PTR(-EINVAL); + } + region = kzalloc(sizeof(*region), GFP_KERNEL); if (!region) - return NULL; + return ERR_PTR(-ENOMEM); id = ida_simple_get(&fpga_region_ida, 0, 0, GFP_KERNEL); - if (id < 0) + if (id < 0) { + ret = id; goto err_free; + } + + region->mgr = info->mgr; + region->compat_id = info->compat_id; + region->priv = info->priv; + region->get_bridges = info->get_bridges; + region->ops_owner = owner; - region->mgr = mgr; - region->get_bridges = get_bridges; mutex_init(®ion->mutex); INIT_LIST_HEAD(®ion->bridge_list); - device_initialize(®ion->dev); region->dev.class = fpga_region_class; region->dev.parent = parent; region->dev.of_node = parent->of_node; @@ -222,6 +228,12 @@ struct fpga_region if (ret) goto err_remove; + ret = device_register(®ion->dev); + if (ret) { + put_device(®ion->dev); + return ERR_PTR(ret); + } + return region; err_remove: @@ -229,78 +241,35 @@ struct fpga_region err_free: kfree(region); - return NULL; -} -EXPORT_SYMBOL_GPL(fpga_region_create); - -/** - * fpga_region_free - free an FPGA region created by fpga_region_create() - * @region: FPGA region - */ -void fpga_region_free(struct fpga_region *region) -{ - ida_simple_remove(&fpga_region_ida, region->dev.id); - kfree(region); -} -EXPORT_SYMBOL_GPL(fpga_region_free); - -static void devm_fpga_region_release(struct device *dev, void *res) -{ - struct fpga_region *region = *(struct fpga_region **)res; - - fpga_region_free(region); + return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(__fpga_region_register_full); /** - * devm_fpga_region_create - create and initialize a managed FPGA region struct + * __fpga_region_register - create and register an FPGA Region device * @parent: device parent * @mgr: manager that programs this region * @get_bridges: optional function to get bridges to a list + * @owner: module containing the get_bridges function * - * This function is intended for use in an FPGA region driver's probe function. - * After the region driver creates the region struct with - * devm_fpga_region_create(), it should register it with fpga_region_register(). - * The region driver's remove function should call fpga_region_unregister(). - * The region struct allocated with this function will be freed automatically on - * driver detach. This includes the case of a probe function returning error - * before calling fpga_region_register(), the struct will still get cleaned up. + * This simple version of the register function should be sufficient for most users. + * The fpga_region_register_full() function is available for users that need to + * pass additional, optional parameters. * - * Return: struct fpga_region or NULL + * Return: struct fpga_region or ERR_PTR() */ -struct fpga_region -*devm_fpga_region_create(struct device *parent, - struct fpga_manager *mgr, - int (*get_bridges)(struct fpga_region *)) +struct fpga_region * +__fpga_region_register(struct device *parent, struct fpga_manager *mgr, + int (*get_bridges)(struct fpga_region *), struct module *owner) { - struct fpga_region **ptr, *region; - - ptr = devres_alloc(devm_fpga_region_release, sizeof(*ptr), GFP_KERNEL); - if (!ptr) - return NULL; + struct fpga_region_info info = { 0 }; - region = fpga_region_create(parent, mgr, get_bridges); - if (!region) { - devres_free(ptr); - } else { - *ptr = region; - devres_add(parent, ptr); - } + info.mgr = mgr; + info.get_bridges = get_bridges; - return region; + return __fpga_region_register_full(parent, &info, owner); } -EXPORT_SYMBOL_GPL(devm_fpga_region_create); - -/** - * fpga_region_register - register an FPGA region - * @region: FPGA region - * - * Return: 0 or -errno - */ -int fpga_region_register(struct fpga_region *region) -{ - return device_add(®ion->dev); -} -EXPORT_SYMBOL_GPL(fpga_region_register); +EXPORT_SYMBOL_GPL(__fpga_region_register); /** * fpga_region_unregister - unregister an FPGA region @@ -316,6 +285,10 @@ EXPORT_SYMBOL_GPL(fpga_region_unregister); static void fpga_region_dev_release(struct device *dev) { + struct fpga_region *region = to_fpga_region(dev); + + ida_simple_remove(&fpga_region_ida, region->dev.id); + kfree(region); } /** diff --git a/drivers/fpga/of-fpga-region.c b/drivers/fpga/of-fpga-region.c index e3c25576b6..9c662db1c5 100644 --- a/drivers/fpga/of-fpga-region.c +++ b/drivers/fpga/of-fpga-region.c @@ -405,16 +405,12 @@ static int of_fpga_region_probe(struct platform_device *pdev) if (IS_ERR(mgr)) return -EPROBE_DEFER; - region = devm_fpga_region_create(dev, mgr, of_fpga_region_get_bridges); - if (!region) { - ret = -ENOMEM; + region = fpga_region_register(dev, mgr, of_fpga_region_get_bridges); + if (IS_ERR(region)) { + ret = PTR_ERR(region); goto eprobe_mgr_put; } - ret = fpga_region_register(region); - if (ret) - goto eprobe_mgr_put; - of_platform_populate(np, fpga_region_of_match, NULL, ®ion->dev); platform_set_drvdata(pdev, region); diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c index 5a909f3c79..c48a82c240 100644 --- a/drivers/gpio/gpio-crystalcove.c +++ b/drivers/gpio/gpio-crystalcove.c @@ -91,7 +91,7 @@ static inline int to_reg(int gpio, enum ctrl_register reg_type) case 0x5e: return GPIOPANELCTL; default: - return -EOPNOTSUPP; + return -ENOTSUPP; } } diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c index 16a0fae1e3..2df948e16e 100644 --- a/drivers/gpio/gpio-wcove.c +++ b/drivers/gpio/gpio-wcove.c @@ -104,7 +104,7 @@ static inline int to_reg(int gpio, enum ctrl_register type) unsigned int reg = type == CTRL_IN ? GPIO_IN_CTRL_BASE : GPIO_OUT_CTRL_BASE; if (gpio >= WCOVE_GPIO_NUM) - return -EOPNOTSUPP; + return -ENOTSUPP; return reg + gpio; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 00a8aef48a..1fae36e334 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1502,6 +1502,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( err_bo_create: unreserve_mem_limit(adev, size, alloc_domain, !!sg); err_reserve_limit: + amdgpu_sync_free(&(*mem)->sync); mutex_destroy(&(*mem)->lock); if (gobj) drm_gem_object_put(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 97178b307e..2229c6e75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -149,6 +149,7 @@ union igp_info { struct atom_integrated_system_info_v1_11 v11; struct atom_integrated_system_info_v1_12 v12; struct atom_integrated_system_info_v2_1 v21; + struct atom_integrated_system_info_v2_3 v23; }; union umc_info { @@ -283,6 +284,20 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (vram_type) *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); break; + case 3: + mem_channel_number = igp_info->v23.umachannelnumber; + if (!mem_channel_number) + mem_channel_number = 1; + mem_type = igp_info->v23.memorytype; + if (mem_type == LpDdr5MemType) + mem_channel_width = 32; + else + mem_channel_width = 64; + if (vram_width) + *vram_width = mem_channel_number * mem_channel_width; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index c777aff164..654f99f410 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -625,7 +625,7 @@ amdgpu_connector_fixup_lcd_native_mode(struct drm_encoder *encoder, if (mode->type & DRM_MODE_TYPE_PREFERRED) { if (mode->hdisplay != native_mode->hdisplay || mode->vdisplay != native_mode->vdisplay) - memcpy(native_mode, mode, sizeof(*mode)); + drm_mode_copy(native_mode, mode); } } @@ -634,7 +634,7 @@ amdgpu_connector_fixup_lcd_native_mode(struct drm_encoder *encoder, list_for_each_entry_safe(mode, t, &connector->probed_modes, head) { if (mode->hdisplay == native_mode->hdisplay && mode->vdisplay == native_mode->vdisplay) { - *native_mode = *mode; + drm_mode_copy(native_mode, mode); drm_mode_set_crtcinfo(native_mode, CRTC_INTERLACE_HALVE_V); DRM_DEBUG_KMS("Determined LVDS native mode details from EDID\n"); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 222a1d9ecf..5f6c32ec67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2487,6 +2487,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + goto init_failed; + r = amdgpu_device_ip_hw_init_phase1(adev); if (r) goto init_failed; @@ -2525,10 +2529,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (!adev->gmc.xgmi.pending_reset) amdgpu_amdkfd_device_init(adev); - r = amdgpu_amdkfd_resume_iommu(adev); - if (r) - goto init_failed; - amdgpu_fru_get_product_info(adev); init_failed: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 439ea256ed..c963b87014 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -820,6 +820,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, if (!obj) return -EINVAL; + if (!info || info->head.block == AMDGPU_RAS_BLOCK_COUNT) + return -EINVAL; + switch (info->head.block) { case AMDGPU_RAS_BLOCK__UMC: if (adev->umc.ras_funcs && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b06fb1fa41..9a1b19e3d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -859,6 +859,7 @@ static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, gtt->ttm.dma_address, flags); } + gtt->bound = true; gart_bind_fail: if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0e4554950e..b2192b2169 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2306,6 +2306,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, trace_amdgpu_vm_bo_map(bo_va, mapping); } +/* Validate operation parameters to prevent potential abuse */ +static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t saddr, + uint64_t offset, + uint64_t size) +{ + uint64_t tmp, lpfn; + + if (saddr & AMDGPU_GPU_PAGE_MASK + || offset & AMDGPU_GPU_PAGE_MASK + || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + if (check_add_overflow(saddr, size, &tmp) + || check_add_overflow(offset, size, &tmp) + || size == 0 /* which also leads to end < begin */) + return -EINVAL; + + /* make sure object fit at this offset */ + if (bo && offset + size > amdgpu_bo_size(bo)) + return -EINVAL; + + /* Ensure last pfn not exceed max_pfn */ + lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; + if (lpfn >= adev->vm_manager.max_pfn) + return -EINVAL; + + return 0; +} + /** * amdgpu_vm_bo_map - map bo inside a vm * @@ -2332,21 +2363,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; + int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); if (tmp) { @@ -2399,17 +2423,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; /* Allocate all the needed memory */ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); @@ -2423,7 +2439,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, } saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; mapping->start = saddr; mapping->last = eaddr; @@ -2510,10 +2526,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; LIST_HEAD(removed); uint64_t eaddr; + int r; + + r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); + if (r) + return r; - eaddr = saddr + size - 1; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; /* Allocate all the needed memory */ before = kzalloc(sizeof(*before), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 0fad9258e0..c189e7ae68 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2152,6 +2152,9 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: SDMA trap\n"); instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) + return instance; + switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[instance].ring); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 3ffbd30180..3ed9e8ed35 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -404,17 +404,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + if (ring->me > 1) { + amdgpu_asic_flush_hdp(adev, ring); + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + } } /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 6688129df2..88f9e1aa51 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -940,8 +940,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, * nodes, but not more than args->num_of_nodes as that is * the amount of memory allocated by user */ - pa = kzalloc((sizeof(struct kfd_process_device_apertures) * - args->num_of_nodes), GFP_KERNEL); + pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures), + GFP_KERNEL); if (!pa) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 21ec8a18ca..7f69031f2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -818,6 +818,14 @@ struct kfd_process *kfd_create_process(struct file *filep) if (process) { pr_debug("Process already found\n"); } else { + /* If the process just called exec(3), it is possible that the + * cleanup of the kfd_process (following the release of the mm + * of the old process image) is still in the cleanup work queue. + * Make sure to drain any job before trying to recreate any + * resource for this process. + */ + flush_workqueue(kfd_process_wq); + process = create_process(thread); if (IS_ERR(process)) goto out; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7385efe699..b821abb56a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2772,6 +2772,7 @@ static int dm_resume(void *handle) dc_stream_release(dm_new_crtc_state->stream); dm_new_crtc_state->stream = NULL; } + dm_new_crtc_state->base.color_mgmt_changed = true; } for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) { @@ -6219,7 +6220,7 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, } } - aconnector->freesync_vid_base = *m_pref; + drm_mode_copy(&aconnector->freesync_vid_base, m_pref); return m_pref; } @@ -6333,8 +6334,10 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, is_freesync_video_mode(&mode, aconnector); if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); - saved_mode = mode; - mode = *freesync_mode; + drm_mode_copy(&saved_mode, &mode); + saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio; + drm_mode_copy(&mode, freesync_mode); + mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio; } else { decide_crtc_timing_for_drm_display_mode( &mode, preferred_mode, scale); diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 228f098e5d..6bc8c6bee4 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2303,6 +2303,7 @@ static enum bp_result construct_integrated_info( result = get_integrated_info_v2_1(bp, info); break; case 2: + case 3: result = get_integrated_info_v2_2(bp, info); break; default: diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 7a00fe525d..bd9bc51983 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -379,6 +379,11 @@ bool cm_helper_translate_curve_to_hw_format( i += increment) { if (j == hw_points - 1) break; + if (i >= TRANSFER_FUNC_POINTS) { + DC_LOG_ERROR("Index out of bounds: i=%d, TRANSFER_FUNC_POINTS=%d\n", + i, TRANSFER_FUNC_POINTS); + return false; + } rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index f834573758..8d8114ee67 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -673,10 +673,20 @@ void dcn30_set_avmute(struct pipe_ctx *pipe_ctx, bool enable) if (pipe_ctx == NULL) return; - if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc != NULL) + if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc != NULL) { pipe_ctx->stream_res.stream_enc->funcs->set_avmute( pipe_ctx->stream_res.stream_enc, enable); + + /* Wait for two frame to make sure AV mute is sent out */ + if (enable) { + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK); + pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + } + } } void dcn30_update_info_frame(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index f5b7da0e64..c0b860ef2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -939,7 +939,12 @@ static bool setup_dsc_config( if (!is_dsc_possible) goto done; - dsc_cfg->num_slices_v = pic_height/slice_height; + if (slice_height > 0) { + dsc_cfg->num_slices_v = pic_height / slice_height; + } else { + is_dsc_possible = false; + goto done; + } if (target_bandwidth_kbps > 0) { is_dsc_possible = decide_dsc_target_bpp_x16( diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index be61975f14..11939be2b4 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -509,6 +509,9 @@ enum mod_hdcp_status mod_hdcp_hdcp2_create_session(struct mod_hdcp *hdcp) hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); + if (!display) + return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; + hdcp_cmd->in_msg.hdcp2_create_session_v2.display_handle = display->index; if (hdcp->connection.link.adjust.hdcp2.force_type == MOD_HDCP_FORCE_TYPE_0) diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h index 4220fd8fdd..54cd86060f 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h @@ -57,10 +57,10 @@ void mod_stats_update_event(struct mod_stats *mod_stats, unsigned int length); void mod_stats_update_flip(struct mod_stats *mod_stats, - unsigned long timestamp_in_ns); + unsigned long long timestamp_in_ns); void mod_stats_update_vupdate(struct mod_stats *mod_stats, - unsigned long timestamp_in_ns); + unsigned long long timestamp_in_ns); void mod_stats_update_freesync(struct mod_stats *mod_stats, unsigned int v_total_min, diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 44955458fe..d6f0f31de5 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1467,6 +1467,49 @@ struct atom_integrated_system_info_v2_2 uint32_t reserved4[189]; }; +struct uma_carveout_option { + char optionName[29]; //max length of string is 28chars + '\0'. Current design is for "minimum", "Medium", "High". This makes entire struct size 64bits + uint8_t memoryCarvedGb; //memory carved out with setting + uint8_t memoryRemainingGb; //memory remaining on system + union { + struct _flags { + uint8_t Auto : 1; + uint8_t Custom : 1; + uint8_t Reserved : 6; + } flags; + uint8_t all8; + } uma_carveout_option_flags; +}; + +struct atom_integrated_system_info_v2_3 { + struct atom_common_table_header table_header; + uint32_t vbios_misc; // enum of atom_system_vbiosmisc_def + uint32_t gpucapinfo; // enum of atom_system_gpucapinf_def + uint32_t system_config; + uint32_t cpucapinfo; + uint16_t gpuclk_ss_percentage; // unit of 0.001%, 1000 mean 1% + uint16_t gpuclk_ss_type; + uint16_t dpphy_override; // bit vector, enum of atom_sysinfo_dpphy_override_def + uint8_t memorytype; // enum of atom_dmi_t17_mem_type_def, APU memory type indication. + uint8_t umachannelnumber; // number of memory channels + uint8_t htc_hyst_limit; + uint8_t htc_tmp_limit; + uint8_t reserved1; // dp_ss_control + uint8_t gpu_package_id; + struct edp_info_table edp1_info; + struct edp_info_table edp2_info; + uint32_t reserved2[8]; + struct atom_external_display_connection_info extdispconninfo; + uint8_t UMACarveoutVersion; + uint8_t UMACarveoutIndexMax; + uint8_t UMACarveoutTypeDefault; + uint8_t UMACarveoutIndexDefault; + uint8_t UMACarveoutType; //Auto or Custom + uint8_t UMACarveoutIndex; + struct uma_carveout_option UMASizeControlOption[20]; + uint8_t reserved3[110]; +}; + // system_config enum atom_system_vbiosmisc_def{ INTEGRATED_SYSTEM_INFO__GET_EDID_CALLBACK_FUNC_SUPPORT = 0x01, diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c index f5847a79dd..0720b2197c 100644 --- a/drivers/gpu/drm/arm/malidp_mw.c +++ b/drivers/gpu/drm/arm/malidp_mw.c @@ -70,7 +70,10 @@ static void malidp_mw_connector_reset(struct drm_connector *connector) __drm_atomic_helper_connector_destroy_state(connector->state); kfree(connector->state); - __drm_atomic_helper_connector_reset(connector, &mw_state->base); + connector->state = NULL; + + if (mw_state) + __drm_atomic_helper_connector_reset(connector, &mw_state->base); } static enum drm_connector_status diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index 5530fbf64f..c8386311cc 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -2040,6 +2040,9 @@ static void cdns_mhdp_atomic_enable(struct drm_bridge *bridge, mhdp_state = to_cdns_mhdp_bridge_state(new_state); mhdp_state->current_mode = drm_mode_duplicate(bridge->dev, mode); + if (!mhdp_state->current_mode) + return; + drm_mode_set_name(mhdp_state->current_mode); dev_dbg(mhdp->dev, "%s: Enabling mode %s\n", __func__, mode->name); diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index e16b0fc0cd..6379d5c8ed 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -475,10 +475,8 @@ static int lt8912_attach_dsi(struct lt8912 *lt) }; host = of_find_mipi_dsi_host_by_node(lt->host_node); - if (!host) { - dev_err(dev, "failed to find dsi host\n"); - return -EPROBE_DEFER; - } + if (!host) + return dev_err_probe(dev, -EPROBE_DEFER, "failed to find dsi host\n"); dsi = devm_mipi_dsi_device_register_full(dev, host, &info); if (IS_ERR(dsi)) { diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index 660e05fa4a..7f58ceda5b 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -766,10 +766,8 @@ static struct mipi_dsi_device *lt9611_attach_dsi(struct lt9611 *lt9611, int ret; host = of_find_mipi_dsi_host_by_node(dsi_node); - if (!host) { - dev_err(lt9611->dev, "failed to find dsi host\n"); - return ERR_PTR(-EPROBE_DEFER); - } + if (!host) + return ERR_PTR(dev_err_probe(lt9611->dev, -EPROBE_DEFER, "failed to find dsi host\n")); dsi = mipi_dsi_device_register_full(host, &info); if (IS_ERR(dsi)) { diff --git a/drivers/gpu/drm/bridge/tc358775.c b/drivers/gpu/drm/bridge/tc358775.c index 2272adcc5b..2e299cfe4e 100644 --- a/drivers/gpu/drm/bridge/tc358775.c +++ b/drivers/gpu/drm/bridge/tc358775.c @@ -453,10 +453,6 @@ static void tc_bridge_enable(struct drm_bridge *bridge) dev_dbg(tc->dev, "bus_formats %04x bpc %d\n", connector->display_info.bus_formats[0], tc->bpc); - /* - * Default hardware register settings of tc358775 configured - * with MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA jeida-24 format - */ if (connector->display_info.bus_formats[0] == MEDIA_BUS_FMT_RGB888_1X7X4_SPWG) { /* VESA-24 */ @@ -467,14 +463,15 @@ static void tc_bridge_enable(struct drm_bridge *bridge) d2l_write(tc->i2c, LV_MX1619, LV_MX(LVI_B6, LVI_B7, LVI_B1, LVI_B2)); d2l_write(tc->i2c, LV_MX2023, LV_MX(LVI_B3, LVI_B4, LVI_B5, LVI_L0)); d2l_write(tc->i2c, LV_MX2427, LV_MX(LVI_HS, LVI_VS, LVI_DE, LVI_R6)); - } else { /* MEDIA_BUS_FMT_RGB666_1X7X3_SPWG - JEIDA-18 */ - d2l_write(tc->i2c, LV_MX0003, LV_MX(LVI_R0, LVI_R1, LVI_R2, LVI_R3)); - d2l_write(tc->i2c, LV_MX0407, LV_MX(LVI_R4, LVI_L0, LVI_R5, LVI_G0)); - d2l_write(tc->i2c, LV_MX0811, LV_MX(LVI_G1, LVI_G2, LVI_L0, LVI_L0)); - d2l_write(tc->i2c, LV_MX1215, LV_MX(LVI_G3, LVI_G4, LVI_G5, LVI_B0)); - d2l_write(tc->i2c, LV_MX1619, LV_MX(LVI_L0, LVI_L0, LVI_B1, LVI_B2)); - d2l_write(tc->i2c, LV_MX2023, LV_MX(LVI_B3, LVI_B4, LVI_B5, LVI_L0)); - d2l_write(tc->i2c, LV_MX2427, LV_MX(LVI_HS, LVI_VS, LVI_DE, LVI_L0)); + } else { + /* JEIDA-18 and JEIDA-24 */ + d2l_write(tc->i2c, LV_MX0003, LV_MX(LVI_R2, LVI_R3, LVI_R4, LVI_R5)); + d2l_write(tc->i2c, LV_MX0407, LV_MX(LVI_R6, LVI_R1, LVI_R7, LVI_G2)); + d2l_write(tc->i2c, LV_MX0811, LV_MX(LVI_G3, LVI_G4, LVI_G0, LVI_G1)); + d2l_write(tc->i2c, LV_MX1215, LV_MX(LVI_G5, LVI_G6, LVI_G7, LVI_B2)); + d2l_write(tc->i2c, LV_MX1619, LV_MX(LVI_B0, LVI_B1, LVI_B3, LVI_B4)); + d2l_write(tc->i2c, LV_MX2023, LV_MX(LVI_B5, LVI_B6, LVI_B7, LVI_L0)); + d2l_write(tc->i2c, LV_MX2427, LV_MX(LVI_HS, LVI_VS, LVI_DE, LVI_R0)); } d2l_write(tc->i2c, VFUEN, VFUEN_EN); @@ -605,10 +602,8 @@ static int tc_bridge_attach(struct drm_bridge *bridge, }; host = of_find_mipi_dsi_host_by_node(tc->host_node); - if (!host) { - dev_err(dev, "failed to find dsi host\n"); - return -EPROBE_DEFER; - } + if (!host) + return dev_err_probe(dev, -EPROBE_DEFER, "failed to find dsi host\n"); dsi = mipi_dsi_device_register_full(host, &info); if (IS_ERR(dsi)) { diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 447ea279e6..957b6dd075 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -775,6 +775,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, unsigned int total_modes_count = 0; struct drm_client_offset *offsets; unsigned int connector_count = 0; + /* points to modes protected by mode_config.mutex */ struct drm_display_mode **modes; struct drm_crtc **crtcs; int i, ret = 0; @@ -843,7 +844,6 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, drm_client_pick_crtcs(client, connectors, connector_count, crtcs, modes, 0, width, height); } - mutex_unlock(&dev->mode_config.mutex); drm_client_modeset_release(client); @@ -873,6 +873,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, modeset->y = offset->y; } } + mutex_unlock(&dev->mode_config.mutex); mutex_unlock(&client->modeset_mutex); out: diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index cfe163103c..1140292820 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -2460,7 +2460,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, dev->mode_config.max_width, dev->mode_config.max_height); else - drm_dbg_kms(dev, "User-space requested a forced probe on [CONNECTOR:%d:%s] but is not the DRM master, demoting to read-only probe", + drm_dbg_kms(dev, "User-space requested a forced probe on [CONNECTOR:%d:%s] but is not the DRM master, demoting to read-only probe\n", connector->base.id, connector->name); } diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 5d30ba3af4..24606b6320 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -653,7 +653,7 @@ EXPORT_SYMBOL(mipi_dsi_set_maximum_return_packet_size); * * Return: 0 on success or a negative error code on failure. */ -ssize_t mipi_dsi_compression_mode(struct mipi_dsi_device *dsi, bool enable) +int mipi_dsi_compression_mode(struct mipi_dsi_device *dsi, bool enable) { /* Note: Needs updating for non-default PPS or algorithm */ u8 tx[2] = { enable << 0, 0 }; @@ -678,8 +678,8 @@ EXPORT_SYMBOL(mipi_dsi_compression_mode); * * Return: 0 on success or a negative error code on failure. */ -ssize_t mipi_dsi_picture_parameter_set(struct mipi_dsi_device *dsi, - const struct drm_dsc_picture_parameter_set *pps) +int mipi_dsi_picture_parameter_set(struct mipi_dsi_device *dsi, + const struct drm_dsc_picture_parameter_set *pps) { struct mipi_dsi_msg msg = { .channel = dsi->channel, diff --git a/drivers/gpu/drm/drm_modeset_helper.c b/drivers/gpu/drm/drm_modeset_helper.c index da483125e0..97071ff5e5 100644 --- a/drivers/gpu/drm/drm_modeset_helper.c +++ b/drivers/gpu/drm/drm_modeset_helper.c @@ -198,13 +198,22 @@ int drm_mode_config_helper_suspend(struct drm_device *dev) if (!dev) return 0; + /* + * Don't disable polling if it was never initialized + */ + if (dev->mode_config.poll_enabled) + drm_kms_helper_poll_disable(dev); - drm_kms_helper_poll_disable(dev); drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 1); state = drm_atomic_helper_suspend(dev); if (IS_ERR(state)) { drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 0); - drm_kms_helper_poll_enable(dev); + /* + * Don't enable polling if it was never initialized + */ + if (dev->mode_config.poll_enabled) + drm_kms_helper_poll_enable(dev); + return PTR_ERR(state); } @@ -244,7 +253,11 @@ int drm_mode_config_helper_resume(struct drm_device *dev) dev->mode_config.suspend_state = NULL; drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 0); - drm_kms_helper_poll_enable(dev); + /* + * Don't enable polling if it is not initialized + */ + if (dev->mode_config.poll_enabled) + drm_kms_helper_poll_enable(dev); return ret; } diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c index f634371c71..7fd3de89ed 100644 --- a/drivers/gpu/drm/drm_panel.c +++ b/drivers/gpu/drm/drm_panel.c @@ -207,19 +207,24 @@ EXPORT_SYMBOL(drm_panel_disable); * The modes probed from the panel are automatically added to the connector * that the panel is attached to. * - * Return: The number of modes available from the panel on success or a - * negative error code on failure. + * Return: The number of modes available from the panel on success, or 0 on + * failure (no modes). */ int drm_panel_get_modes(struct drm_panel *panel, struct drm_connector *connector) { if (!panel) - return -EINVAL; + return 0; - if (panel->funcs && panel->funcs->get_modes) - return panel->funcs->get_modes(panel, connector); + if (panel->funcs && panel->funcs->get_modes) { + int num; - return -EOPNOTSUPP; + num = panel->funcs->get_modes(panel, connector); + if (num > 0) + return num; + } + + return 0; } EXPORT_SYMBOL(drm_panel_get_modes); diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index f6b72e0368..e79bb93072 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -235,6 +235,9 @@ drm_connector_mode_valid(struct drm_connector *connector, * Drivers can call this helper from their device resume implementation. It is * not an error to call this even when output polling isn't enabled. * + * If device polling was never initialized before, this call will trigger a + * warning and return. + * * Note that calls to enable and disable polling must be strictly ordered, which * is automatically the case when they're only call from suspend/resume * callbacks. @@ -246,7 +249,8 @@ void drm_kms_helper_poll_enable(struct drm_device *dev) struct drm_connector_list_iter conn_iter; unsigned long delay = DRM_OUTPUT_POLL_PERIOD; - if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll) + if (drm_WARN_ON_ONCE(dev, !dev->mode_config.poll_enabled) || + !drm_kms_helper_poll || dev->mode_config.poll_running) return; drm_connector_list_iter_begin(dev, &conn_iter); @@ -494,7 +498,8 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, } /* Re-enable polling in case the global poll config changed. */ - if (drm_kms_helper_poll != dev->mode_config.poll_running) + if (dev->mode_config.poll_enabled && + (drm_kms_helper_poll != dev->mode_config.poll_running)) drm_kms_helper_poll_enable(dev); dev->mode_config.poll_running = drm_kms_helper_poll; @@ -742,14 +747,18 @@ EXPORT_SYMBOL(drm_kms_helper_is_poll_worker); * not an error to call this even when output polling isn't enabled or already * disabled. Polling is re-enabled by calling drm_kms_helper_poll_enable(). * + * If however, the polling was never initialized, this call will trigger a + * warning and return + * * Note that calls to enable and disable polling must be strictly ordered, which * is automatically the case when they're only call from suspend/resume * callbacks. */ void drm_kms_helper_poll_disable(struct drm_device *dev) { - if (!dev->mode_config.poll_enabled) + if (drm_WARN_ON(dev, !dev->mode_config.poll_enabled)) return; + cancel_delayed_work_sync(&dev->mode_config.output_poll_work); } EXPORT_SYMBOL(drm_kms_helper_poll_disable); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 7dcc639279..c45e5158fd 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -488,7 +488,7 @@ static const struct drm_driver etnaviv_drm_driver = { .desc = "etnaviv DRM", .date = "20151214", .major = 1, - .minor = 3, + .minor = 4, }; /* diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c index f2fc645c79..212e7050c4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c @@ -135,6 +135,9 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu) { struct etnaviv_chip_identity *ident = &gpu->identity; + const u32 product_id = ident->product_id; + const u32 customer_id = ident->customer_id; + const u32 eco_id = ident->eco_id; int i; for (i = 0; i < ARRAY_SIZE(etnaviv_chip_identities); i++) { @@ -148,6 +151,12 @@ bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu) etnaviv_chip_identities[i].eco_id == ~0U)) { memcpy(ident, &etnaviv_chip_identities[i], sizeof(*ident)); + + /* Restore some id values as ~0U aka 'don't care' might been used. */ + ident->product_id = product_id; + ident->customer_id = customer_id; + ident->eco_id = eco_id; + return true; } } diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index e96436e11a..e1ffe8a28b 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -315,14 +315,14 @@ static int vidi_get_modes(struct drm_connector *connector) */ if (!ctx->raw_edid) { DRM_DEV_DEBUG_KMS(ctx->dev, "raw_edid is null.\n"); - return -EFAULT; + return 0; } edid_len = (1 + ctx->raw_edid->extensions) * EDID_LENGTH; edid = kmemdup(ctx->raw_edid, edid_len, GFP_KERNEL); if (!edid) { DRM_DEV_DEBUG_KMS(ctx->dev, "failed to allocate edid\n"); - return -ENOMEM; + return 0; } drm_connector_update_edid_property(connector, edid); diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 42b6b00140..35be9f0245 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -887,11 +887,11 @@ static int hdmi_get_modes(struct drm_connector *connector) int ret; if (!hdata->ddc_adpt) - return -ENODEV; + return 0; edid = drm_get_edid(connector, hdata->ddc_adpt); if (!edid) - return -ENODEV; + return 0; hdata->dvi_mode = !drm_detect_hdmi_monitor(edid); DRM_DEV_DEBUG_KMS(hdata->dev, "%s : width[%d] x height[%d]\n", diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 745ffa7572..75defafb79 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2000,7 +2000,7 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state) &new_cdclk_state->actual)) return; - if (pipe == INVALID_PIPE || + if (new_cdclk_state->disable_pipes || old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) { drm_WARN_ON(&dev_priv->drm, !new_cdclk_state->base.changed); @@ -2029,7 +2029,7 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state) &new_cdclk_state->actual)) return; - if (pipe != INVALID_PIPE && + if (!new_cdclk_state->disable_pipes && old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) { drm_WARN_ON(&dev_priv->drm, !new_cdclk_state->base.changed); @@ -2456,6 +2456,7 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa return NULL; cdclk_state->pipe = INVALID_PIPE; + cdclk_state->disable_pipes = false; return &cdclk_state->base; } @@ -2575,6 +2576,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) if (ret) return ret; + new_cdclk_state->disable_pipes = true; + drm_dbg_kms(&dev_priv->drm, "Modeset required for cdclk change\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index b34eb00fb3..42376b5b3f 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -52,6 +52,9 @@ struct intel_cdclk_state { /* bitmask of active pipes */ u8 active_pipes; + + /* update cdclk with pipes disabled */ + bool disable_pipes; }; int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 8ea0fa665e..1eb2395ea2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -374,6 +374,9 @@ i915_gem_userptr_release(struct drm_i915_gem_object *obj) { GEM_WARN_ON(obj->userptr.page_ref); + if (!obj->userptr.notifier.mm) + return; + mmu_interval_notifier_remove(&obj->userptr.notifier); obj->userptr.notifier.mm = NULL; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index dacd627737..5a90969067 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -274,9 +274,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_breadcrumbs_park(engine->breadcrumbs); - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); - if (engine->park) engine->park(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 773ff51218..eac55083c5 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3188,6 +3188,9 @@ static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); cancel_timer(&engine->execlists.preempt); + + /* Reset upon idling, or we may delay the busy wakeup. */ + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void add_to_engine(struct i915_request *rq) diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index 63ba2ad846..5423ea4c1e 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -70,14 +70,14 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector) int ret; if (!mode) - return -EINVAL; + return 0; ret = of_get_drm_display_mode(np, &imxpd->mode, &imxpd->bus_flags, OF_USE_NATIVE_MODE); if (ret) { drm_mode_destroy(connector->dev, mode); - return ret; + return 0; } drm_mode_copy(mode, &imxpd->mode); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index b983adffa3..88bdb8eeba 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -33,6 +33,9 @@ static struct mtk_drm_gem_obj *mtk_drm_gem_init(struct drm_device *dev, size = round_up(size, PAGE_SIZE); + if (size == 0) + return ERR_PTR(-EINVAL); + mtk_gem_obj = kzalloc(sizeof(*mtk_gem_obj), GFP_KERNEL); if (!mtk_gem_obj) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c index 5cd2b2ebbb..2c8e978eb9 100644 --- a/drivers/gpu/drm/meson/meson_dw_hdmi.c +++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c @@ -105,6 +105,8 @@ #define HHI_HDMI_CLK_CNTL 0x1cc /* 0x73 */ #define HHI_HDMI_PHY_CNTL0 0x3a0 /* 0xe8 */ #define HHI_HDMI_PHY_CNTL1 0x3a4 /* 0xe9 */ +#define PHY_CNTL1_INIT 0x03900000 +#define PHY_INVERT BIT(17) #define HHI_HDMI_PHY_CNTL2 0x3a8 /* 0xea */ #define HHI_HDMI_PHY_CNTL3 0x3ac /* 0xeb */ #define HHI_HDMI_PHY_CNTL4 0x3b0 /* 0xec */ @@ -129,6 +131,8 @@ struct meson_dw_hdmi_data { unsigned int addr); void (*dwc_write)(struct meson_dw_hdmi *dw_hdmi, unsigned int addr, unsigned int data); + u32 cntl0_init; + u32 cntl1_init; }; struct meson_dw_hdmi { @@ -384,26 +388,6 @@ static int dw_hdmi_phy_init(struct dw_hdmi *hdmi, void *data, drm_mode_is_420_also(display, mode))) mode_is_420 = true; - /* Enable clocks */ - regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL, 0xffff, 0x100); - - /* Bring HDMITX MEM output of power down */ - regmap_update_bits(priv->hhi, HHI_MEM_PD_REG0, 0xff << 8, 0); - - /* Bring out of reset */ - dw_hdmi->data->top_write(dw_hdmi, HDMITX_TOP_SW_RESET, 0); - - /* Enable internal pixclk, tmds_clk, spdif_clk, i2s_clk, cecclk */ - dw_hdmi_top_write_bits(dw_hdmi, HDMITX_TOP_CLK_CNTL, - 0x3, 0x3); - - /* Enable cec_clk and hdcp22_tmdsclk_en */ - dw_hdmi_top_write_bits(dw_hdmi, HDMITX_TOP_CLK_CNTL, - 0x3 << 4, 0x3 << 4); - - /* Enable normal output to PHY */ - dw_hdmi->data->top_write(dw_hdmi, HDMITX_TOP_BIST_CNTL, BIT(12)); - /* TMDS pattern setup */ if (mode->clock > 340000 && !mode_is_420) { dw_hdmi->data->top_write(dw_hdmi, HDMITX_TOP_TMDS_CLK_PTTN_01, @@ -425,20 +409,6 @@ static int dw_hdmi_phy_init(struct dw_hdmi *hdmi, void *data, /* Setup PHY parameters */ meson_hdmi_phy_setup_mode(dw_hdmi, mode, mode_is_420); - /* Setup PHY */ - regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, - 0xffff << 16, 0x0390 << 16); - - /* BIT_INVERT */ - if (dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-gxl-dw-hdmi") || - dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-gxm-dw-hdmi") || - dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-g12a-dw-hdmi")) - regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, - BIT(17), 0); - else - regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, - BIT(17), BIT(17)); - /* Disable clock, fifo, fifo_wr */ regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, 0xf, 0); @@ -492,7 +462,9 @@ static void dw_hdmi_phy_disable(struct dw_hdmi *hdmi, DRM_DEBUG_DRIVER("\n"); - regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, 0); + /* Fallback to init mode */ + regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL1, dw_hdmi->data->cntl1_init); + regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, dw_hdmi->data->cntl0_init); } static enum drm_connector_status dw_hdmi_read_hpd(struct dw_hdmi *hdmi, @@ -610,11 +582,22 @@ static const struct regmap_config meson_dw_hdmi_regmap_config = { .fast_io = true, }; -static const struct meson_dw_hdmi_data meson_dw_hdmi_gx_data = { +static const struct meson_dw_hdmi_data meson_dw_hdmi_gxbb_data = { .top_read = dw_hdmi_top_read, .top_write = dw_hdmi_top_write, .dwc_read = dw_hdmi_dwc_read, .dwc_write = dw_hdmi_dwc_write, + .cntl0_init = 0x0, + .cntl1_init = PHY_CNTL1_INIT | PHY_INVERT, +}; + +static const struct meson_dw_hdmi_data meson_dw_hdmi_gxl_data = { + .top_read = dw_hdmi_top_read, + .top_write = dw_hdmi_top_write, + .dwc_read = dw_hdmi_dwc_read, + .dwc_write = dw_hdmi_dwc_write, + .cntl0_init = 0x0, + .cntl1_init = PHY_CNTL1_INIT, }; static const struct meson_dw_hdmi_data meson_dw_hdmi_g12a_data = { @@ -622,6 +605,8 @@ static const struct meson_dw_hdmi_data meson_dw_hdmi_g12a_data = { .top_write = dw_hdmi_g12a_top_write, .dwc_read = dw_hdmi_g12a_dwc_read, .dwc_write = dw_hdmi_g12a_dwc_write, + .cntl0_init = 0x000b4242, /* Bandgap */ + .cntl1_init = PHY_CNTL1_INIT, }; static void meson_dw_hdmi_init(struct meson_dw_hdmi *meson_dw_hdmi) @@ -656,6 +641,13 @@ static void meson_dw_hdmi_init(struct meson_dw_hdmi *meson_dw_hdmi) meson_dw_hdmi->data->top_write(meson_dw_hdmi, HDMITX_TOP_CLK_CNTL, 0xff); + /* Enable normal output to PHY */ + meson_dw_hdmi->data->top_write(meson_dw_hdmi, HDMITX_TOP_BIST_CNTL, BIT(12)); + + /* Setup PHY */ + regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL1, meson_dw_hdmi->data->cntl1_init); + regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, meson_dw_hdmi->data->cntl0_init); + /* Enable HDMI-TX Interrupt */ meson_dw_hdmi->data->top_write(meson_dw_hdmi, HDMITX_TOP_INTR_STAT_CLR, HDMITX_TOP_INTR_CORE); @@ -883,11 +875,11 @@ static const struct dev_pm_ops meson_dw_hdmi_pm_ops = { static const struct of_device_id meson_dw_hdmi_of_table[] = { { .compatible = "amlogic,meson-gxbb-dw-hdmi", - .data = &meson_dw_hdmi_gx_data }, + .data = &meson_dw_hdmi_gxbb_data }, { .compatible = "amlogic,meson-gxl-dw-hdmi", - .data = &meson_dw_hdmi_gx_data }, + .data = &meson_dw_hdmi_gxl_data }, { .compatible = "amlogic,meson-gxm-dw-hdmi", - .data = &meson_dw_hdmi_gx_data }, + .data = &meson_dw_hdmi_gxl_data }, { .compatible = "amlogic,meson-g12a-dw-hdmi", .data = &meson_dw_hdmi_g12a_data }, { } diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 2a82119eb5..2a942dc6a6 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -790,13 +790,13 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, FREQ_1000_1001(params[i].pixel_freq)); DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", i, params[i].phy_freq, - FREQ_1000_1001(params[i].phy_freq/10)*10); + FREQ_1000_1001(params[i].phy_freq/1000)*1000); /* Match strict frequency */ if (phy_freq == params[i].phy_freq && vclk_freq == params[i].vclk_freq) return MODE_OK; /* Match 1000/1001 variant */ - if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/10)*10) && + if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/1000)*1000) && vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) return MODE_OK; } @@ -1070,7 +1070,7 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, for (freq = 0 ; params[freq].pixel_freq ; ++freq) { if ((phy_freq == params[freq].phy_freq || - phy_freq == FREQ_1000_1001(params[freq].phy_freq/10)*10) && + phy_freq == FREQ_1000_1001(params[freq].phy_freq/1000)*1000) && (vclk_freq == params[freq].vclk_freq || vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { if (vclk_freq != params[freq].vclk_freq) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c index aa01698d6b..a05276f0d6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c @@ -441,9 +441,6 @@ static void dpu_encoder_phys_cmd_enable_helper( _dpu_encoder_phys_cmd_pingpong_config(phys_enc); - if (!dpu_encoder_phys_cmd_is_master(phys_enc)) - return; - ctl = phys_enc->hw_ctl; ctl->ops.update_pending_flush_intf(ctl, phys_enc->intf_idx); } diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 8d0612caf6..c563ecf6e7 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -501,8 +501,8 @@ int dsi_link_clk_set_rate_6g(struct msm_dsi_host *msm_host) unsigned long byte_intf_rate; int ret; - DBG("Set clk rates: pclk=%d, byteclk=%lu", - msm_host->mode->clock, msm_host->byte_clk_rate); + DBG("Set clk rates: pclk=%lu, byteclk=%lu", + msm_host->pixel_clk_rate, msm_host->byte_clk_rate); ret = dev_pm_opp_set_rate(&msm_host->pdev->dev, msm_host->byte_clk_rate); @@ -583,9 +583,9 @@ int dsi_link_clk_set_rate_v2(struct msm_dsi_host *msm_host) { int ret; - DBG("Set clk rates: pclk=%d, byteclk=%lu, esc_clk=%lu, dsi_src_clk=%lu", - msm_host->mode->clock, msm_host->byte_clk_rate, - msm_host->esc_clk_rate, msm_host->src_clk_rate); + DBG("Set clk rates: pclk=%lu, byteclk=%lu, esc_clk=%lu, dsi_src_clk=%lu", + msm_host->pixel_clk_rate, msm_host->byte_clk_rate, + msm_host->esc_clk_rate, msm_host->src_clk_rate); ret = clk_set_rate(msm_host->byte_clk, msm_host->byte_clk_rate); if (ret) { diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index e8c445eb11..f63ceb8d3e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -23,6 +23,7 @@ */ #include "nouveau_drv.h" +#include "nouveau_bios.h" #include "nouveau_reg.h" #include "dispnv04/hw.h" #include "nouveau_encoder.h" @@ -1675,7 +1676,7 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf) */ if (nv_match_device(dev, 0x0201, 0x1462, 0x8851)) { if (*conn == 0xf2005014 && *conf == 0xffffffff) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, 1); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, DCB_OUTPUT_B); return false; } } @@ -1761,26 +1762,26 @@ fabricate_dcb_encoder_table(struct drm_device *dev, struct nvbios *bios) #ifdef __powerpc__ /* Apple iMac G4 NV17 */ if (of_machine_is_compatible("PowerMac4,5")) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, 1); - fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, 2); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, DCB_OUTPUT_B); + fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, DCB_OUTPUT_C); return; } #endif /* Make up some sane defaults */ fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, - bios->legacy.i2c_indices.crt, 1, 1); + bios->legacy.i2c_indices.crt, 1, DCB_OUTPUT_B); if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0) fabricate_dcb_output(dcb, DCB_OUTPUT_TV, bios->legacy.i2c_indices.tv, - all_heads, 0); + all_heads, DCB_OUTPUT_A); else if (bios->tmds.output0_script_ptr || bios->tmds.output1_script_ptr) fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, bios->legacy.i2c_indices.panel, - all_heads, 1); + all_heads, DCB_OUTPUT_B); } static int diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c index 447b7594b3..0107a21dc9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dp.c +++ b/drivers/gpu/drm/nouveau/nouveau_dp.c @@ -109,12 +109,15 @@ nouveau_dp_detect(struct nouveau_connector *nv_connector, u8 *dpcd = nv_encoder->dp.dpcd; int ret = NOUVEAU_DP_NONE; - /* If we've already read the DPCD on an eDP device, we don't need to - * reread it as it won't change + /* eDP ports don't support hotplugging - so there's no point in probing eDP ports unless we + * haven't probed them once before. */ - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP && - dpcd[DP_DPCD_REV] != 0) - return NOUVEAU_DP_SST; + if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { + if (connector->status == connector_status_connected) + return NOUVEAU_DP_SST; + else if (connector->status == connector_status_disconnected) + return NOUVEAU_DP_NONE; + } mutex_lock(&nv_encoder->dp.hpd_irq_lock); if (mstm) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c index 4bf486b571..cb05f7f48a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c @@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name) return ERR_PTR(-EINVAL); } +static void of_fini(void *p) +{ + kfree(p); +} + const struct nvbios_source nvbios_of = { .name = "OpenFirmware", .init = of_init, - .fini = (void(*)(void *))kfree, + .fini = of_fini, .read = of_read, .size = of_size, .rw = false, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index c51bac7617..9fe5b6a36a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -221,8 +221,11 @@ nv50_instobj_acquire(struct nvkm_memory *memory) void __iomem *map = NULL; /* Already mapped? */ - if (refcount_inc_not_zero(&iobj->maps)) + if (refcount_inc_not_zero(&iobj->maps)) { + /* read barrier match the wmb on refcount set */ + smp_rmb(); return iobj->map; + } /* Take the lock, and re-check that another thread hasn't * already mapped the object in the meantime. @@ -249,6 +252,8 @@ nv50_instobj_acquire(struct nvkm_memory *memory) iobj->base.memory.ptrs = &nv50_instobj_fast; else iobj->base.memory.ptrs = &nv50_instobj_slow; + /* barrier to ensure the ptrs are written before refcount is set */ + smp_wmb(); refcount_set(&iobj->maps, 1); } diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c index e1542451ef..0d89779de2 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -420,7 +420,7 @@ static int ili9341_dpi_prepare(struct drm_panel *panel) ili9341_dpi_init(ili); - return ret; + return 0; } static int ili9341_dpi_enable(struct drm_panel *panel) @@ -716,18 +716,18 @@ static int ili9341_probe(struct spi_device *spi) reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(reset)) - dev_err(dev, "Failed to get gpio 'reset'\n"); + return dev_err_probe(dev, PTR_ERR(reset), "Failed to get gpio 'reset'\n"); dc = devm_gpiod_get_optional(dev, "dc", GPIOD_OUT_LOW); if (IS_ERR(dc)) - dev_err(dev, "Failed to get gpio 'dc'\n"); + return dev_err_probe(dev, PTR_ERR(dc), "Failed to get gpio 'dc'\n"); if (!strcmp(id->name, "sf-tc240t-9370-t")) return ili9341_dpi_probe(spi, dc, reset); else if (!strcmp(id->name, "yx240qv29")) return ili9341_dbi_probe(spi, dc, reset); - return -1; + return -ENODEV; } static int ili9341_remove(struct spi_device *spi) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 671bd1d1ad..0dc4d891fe 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2613,6 +2613,9 @@ static const struct panel_desc innolux_g121x1_l03 = { .unprepare = 200, .disable = 400, }, + .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, }; static const struct drm_display_mode innolux_n116bca_ea1_mode = { diff --git a/drivers/gpu/drm/panel/panel-visionox-rm69299.c b/drivers/gpu/drm/panel/panel-visionox-rm69299.c index eb43503ec9..6134432e49 100644 --- a/drivers/gpu/drm/panel/panel-visionox-rm69299.c +++ b/drivers/gpu/drm/panel/panel-visionox-rm69299.c @@ -261,8 +261,6 @@ static int visionox_rm69299_remove(struct mipi_dsi_device *dsi) struct visionox_rm69299 *ctx = mipi_dsi_get_drvdata(dsi); mipi_dsi_detach(ctx->dsi); - mipi_dsi_device_unregister(ctx->dsi); - drm_panel_remove(&ctx->panel); return 0; } diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 7a8353d7ab..86d77794d8 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -222,7 +222,7 @@ static int vc4_hdmi_connector_get_modes(struct drm_connector *connector) edid = drm_get_edid(connector, vc4_hdmi->ddc); cec_s_phys_addr_from_edid(vc4_hdmi->cec_adap, edid); if (!edid) - return -ENODEV; + return 0; vc4_encoder->hdmi_monitor = drm_detect_hdmi_monitor(edid); @@ -1506,6 +1506,8 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) index = 1; addr = of_get_address(dev->of_node, index, NULL, NULL); + if (!addr) + return -EINVAL; vc4_hdmi->audio.dma_data.addr = be32_to_cpup(addr) + mai_data->offset; vc4_hdmi->audio.dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index b91f8d1740..21134c7f18 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -472,7 +472,7 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, vmw_res_type(ctx) == vmw_res_dx_context) { for (i = 0; i < cotable_max; ++i) { res = vmw_context_cotable(ctx, i); - if (IS_ERR(res)) + if (IS_ERR_OR_NULL(res)) continue; ret = vmw_execbuf_res_noctx_val_add(sw_context, res, @@ -1277,6 +1277,8 @@ static int vmw_cmd_dx_define_query(struct vmw_private *dev_priv, return -EINVAL; cotable_res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_DXQUERY); + if (IS_ERR_OR_NULL(cotable_res)) + return cotable_res ? PTR_ERR(cotable_res) : -EINVAL; ret = vmw_cotable_notify(cotable_res, cmd->body.queryId); return ret; @@ -2455,6 +2457,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv, return ret; res = vmw_context_cotable(ctx_node->ctx, vmw_view_cotables[view_type]); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->defined_id); if (unlikely(ret != 0)) return ret; @@ -2540,8 +2544,8 @@ static int vmw_cmd_dx_so_define(struct vmw_private *dev_priv, so_type = vmw_so_cmd_to_type(header->id); res = vmw_context_cotable(ctx_node->ctx, vmw_so_cotables[so_type]); - if (IS_ERR(res)) - return PTR_ERR(res); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; cmd = container_of(header, typeof(*cmd), header); ret = vmw_cotable_notify(res, cmd->defined_id); @@ -2660,6 +2664,8 @@ static int vmw_cmd_dx_define_shader(struct vmw_private *dev_priv, return -EINVAL; res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_DXSHADER); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->body.shaderId); if (ret) return ret; @@ -2981,6 +2987,8 @@ static int vmw_cmd_dx_define_streamoutput(struct vmw_private *dev_priv, } res = vmw_context_cotable(ctx_node->ctx, SVGA_COTABLE_STREAMOUTPUT); + if (IS_ERR_OR_NULL(res)) + return res ? PTR_ERR(res) : -EINVAL; ret = vmw_cotable_notify(res, cmd->body.soid); if (ret) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index b32ddbb992..50eba25456 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1068,7 +1068,7 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv, } event->event.base.type = DRM_VMW_EVENT_FENCE_SIGNALED; - event->event.base.length = sizeof(*event); + event->event.base.length = sizeof(event->event); event->event.user_data = user_data; ret = drm_event_reserve_init(dev, file_priv, &event->base, &event->event.base); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 01d5a01af2..e2a52b5de1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -537,6 +537,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane, int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { + struct vmw_private *vmw = vmw_priv(crtc->dev); struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc); struct vmw_display_unit *du = vmw_crtc_to_du(new_state->crtc); @@ -544,9 +545,13 @@ int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, bool has_primary = new_state->plane_mask & drm_plane_mask(crtc->primary); - /* We always want to have an active plane with an active CRTC */ - if (has_primary != new_state->enable) - return -EINVAL; + /* + * This is fine in general, but broken userspace might expect + * some actual rendering so give a clue as why it's blank. + */ + if (new_state->enable && !has_primary) + drm_dbg_driver(&vmw->drm, + "CRTC without a primary plane will be blank.\n"); if (new_state->connector_mask != connector_mask && diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index 8c8ee87fd3..23c2dc943c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -246,10 +246,10 @@ struct vmw_framebuffer_bo { static const uint32_t __maybe_unused vmw_primary_plane_formats[] = { - DRM_FORMAT_XRGB1555, - DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB1555, }; static const uint32_t __maybe_unused vmw_cursor_plane_formats[] = { diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 218e3718fd..96737ddc81 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -367,11 +367,6 @@ static int host1x_device_uevent(struct device *dev, return 0; } -static int host1x_dma_configure(struct device *dev) -{ - return of_dma_configure(dev, dev->of_node, true); -} - static const struct dev_pm_ops host1x_device_pm_ops = { .suspend = pm_generic_suspend, .resume = pm_generic_resume, @@ -385,7 +380,6 @@ struct bus_type host1x_bus_type = { .name = "host1x", .match = host1x_device_match, .uevent = host1x_device_uevent, - .dma_configure = host1x_dma_configure, .pm = &host1x_device_pm_ops, }; @@ -474,8 +468,6 @@ static int host1x_device_add(struct host1x *host1x, device->dev.bus = &host1x_bus_type; device->dev.parent = host1x->dev; - of_dma_configure(&device->dev, host1x->dev->of_node, true); - device->dev.dma_parms = &device->dma_parms; dma_set_max_seg_size(&device->dev, UINT_MAX); diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 08768e5acc..57697605b2 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -965,9 +965,7 @@ static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev, } break; case REPORT_TYPE_MOUSE: - workitem->reports_supported |= STD_MOUSE | HIDPP; - if (djrcv_dev->type == recvr_type_mouse_only) - workitem->reports_supported |= MULTIMEDIA; + workitem->reports_supported |= STD_MOUSE | HIDPP | MULTIMEDIA; break; } } diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 7c61bb9291..f8c56810d2 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -51,7 +51,6 @@ /* flags */ #define I2C_HID_STARTED 0 #define I2C_HID_RESET_PENDING 1 -#define I2C_HID_READ_PENDING 2 #define I2C_HID_PWR_ON 0x00 #define I2C_HID_PWR_SLEEP 0x01 @@ -251,7 +250,6 @@ static int __i2c_hid_command(struct i2c_client *client, msg[1].len = data_len; msg[1].buf = buf_recv; msg_num = 2; - set_bit(I2C_HID_READ_PENDING, &ihid->flags); } if (wait) @@ -259,9 +257,6 @@ static int __i2c_hid_command(struct i2c_client *client, ret = i2c_transfer(client->adapter, msg, msg_num); - if (data_len > 0) - clear_bit(I2C_HID_READ_PENDING, &ihid->flags); - if (ret != msg_num) return ret < 0 ? ret : -EIO; @@ -533,9 +528,6 @@ static irqreturn_t i2c_hid_irq(int irq, void *dev_id) { struct i2c_hid *ihid = dev_id; - if (test_bit(I2C_HID_READ_PENDING, &ihid->flags)) - return IRQ_HANDLED; - i2c_hid_get_input(ihid); return IRQ_HANDLED; diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c index 6c942dd1ab..ba45605fc6 100644 --- a/drivers/hid/intel-ish-hid/ipc/ipc.c +++ b/drivers/hid/intel-ish-hid/ipc/ipc.c @@ -944,6 +944,7 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev) if (!dev) return NULL; + dev->devc = &pdev->dev; ishtp_device_init(dev); init_waitqueue_head(&dev->wait_hw_ready); @@ -979,7 +980,6 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev) } dev->ops = &ish_hw_ops; - dev->devc = &pdev->dev; dev->mtu = IPC_PAYLOAD_SIZE - sizeof(struct ishtp_msg_hdr); return dev; } diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 5916ef2933..bbc3ea3458 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -212,6 +212,11 @@ static int ish_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* request and enable interrupt */ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (ret < 0) { + dev_err(dev, "ISH: Failed to allocate IRQ vectors\n"); + return ret; + } + if (!pdev->msi_enabled && !pdev->msix_enabled) irq_flag = IRQF_SHARED; diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index ba0ca652b9..09da654d2b 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -84,7 +84,7 @@ static void uhid_device_add_worker(struct work_struct *work) * However, we do have to clear the ->running flag and do a * wakeup to make sure userspace knows that the device is gone. */ - uhid->running = false; + WRITE_ONCE(uhid->running, false); wake_up_interruptible(&uhid->report_wait); } } @@ -194,9 +194,9 @@ static int __uhid_report_queue_and_wait(struct uhid_device *uhid, spin_unlock_irqrestore(&uhid->qlock, flags); ret = wait_event_interruptible_timeout(uhid->report_wait, - !uhid->report_running || !uhid->running, + !uhid->report_running || !READ_ONCE(uhid->running), 5 * HZ); - if (!ret || !uhid->running || uhid->report_running) + if (!ret || !READ_ONCE(uhid->running) || uhid->report_running) ret = -EIO; else if (ret < 0) ret = -ERESTARTSYS; @@ -237,7 +237,7 @@ static int uhid_hid_get_report(struct hid_device *hid, unsigned char rnum, struct uhid_event *ev; int ret; - if (!uhid->running) + if (!READ_ONCE(uhid->running)) return -EIO; ev = kzalloc(sizeof(*ev), GFP_KERNEL); @@ -279,7 +279,7 @@ static int uhid_hid_set_report(struct hid_device *hid, unsigned char rnum, struct uhid_event *ev; int ret; - if (!uhid->running || count > UHID_DATA_MAX) + if (!READ_ONCE(uhid->running) || count > UHID_DATA_MAX) return -EIO; ev = kzalloc(sizeof(*ev), GFP_KERNEL); @@ -580,7 +580,7 @@ static int uhid_dev_destroy(struct uhid_device *uhid) if (!uhid->hid) return -EINVAL; - uhid->running = false; + WRITE_ONCE(uhid->running, false); wake_up_interruptible(&uhid->report_wait); cancel_work_sync(&uhid->worker); @@ -594,7 +594,7 @@ static int uhid_dev_destroy(struct uhid_device *uhid) static int uhid_dev_input(struct uhid_device *uhid, struct uhid_event *ev) { - if (!uhid->running) + if (!READ_ONCE(uhid->running)) return -EINVAL; hid_input_report(uhid->hid, HID_INPUT_REPORT, ev->u.input.data, @@ -605,7 +605,7 @@ static int uhid_dev_input(struct uhid_device *uhid, struct uhid_event *ev) static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev) { - if (!uhid->running) + if (!READ_ONCE(uhid->running)) return -EINVAL; hid_input_report(uhid->hid, HID_INPUT_REPORT, ev->u.input2.data, @@ -617,7 +617,7 @@ static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev) static int uhid_dev_get_report_reply(struct uhid_device *uhid, struct uhid_event *ev) { - if (!uhid->running) + if (!READ_ONCE(uhid->running)) return -EINVAL; uhid_report_wake_up(uhid, ev->u.get_report_reply.id, ev); @@ -627,7 +627,7 @@ static int uhid_dev_get_report_reply(struct uhid_device *uhid, static int uhid_dev_set_report_reply(struct uhid_device *uhid, struct uhid_event *ev) { - if (!uhid->running) + if (!READ_ONCE(uhid->running)) return -EINVAL; uhid_report_wake_up(uhid, ev->u.set_report_reply.id, ev); diff --git a/drivers/hwmon/amc6821.c b/drivers/hwmon/amc6821.c index 6b1ce2242c..60dfdb0f55 100644 --- a/drivers/hwmon/amc6821.c +++ b/drivers/hwmon/amc6821.c @@ -934,10 +934,21 @@ static const struct i2c_device_id amc6821_id[] = { MODULE_DEVICE_TABLE(i2c, amc6821_id); +static const struct of_device_id __maybe_unused amc6821_of_match[] = { + { + .compatible = "ti,amc6821", + .data = (void *)amc6821, + }, + { } +}; + +MODULE_DEVICE_TABLE(of, amc6821_of_match); + static struct i2c_driver amc6821_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "amc6821", + .of_match_table = of_match_ptr(amc6821_of_match), }, .probe_new = amc6821_probe, .id_table = amc6821_id, diff --git a/drivers/hwmon/corsair-cpro.c b/drivers/hwmon/corsair-cpro.c index fa6aa4fc8b..486fb6a8c3 100644 --- a/drivers/hwmon/corsair-cpro.c +++ b/drivers/hwmon/corsair-cpro.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #define USB_VENDOR_ID_CORSAIR 0x1b1c @@ -77,8 +78,11 @@ struct ccp_device { struct hid_device *hdev; struct device *hwmon_dev; + /* For reinitializing the completion below */ + spinlock_t wait_input_report_lock; struct completion wait_input_report; struct mutex mutex; /* whenever buffer is used, lock before send_usb_cmd */ + u8 *cmd_buffer; u8 *buffer; int target[6]; DECLARE_BITMAP(temp_cnct, NUM_TEMP_SENSORS); @@ -111,15 +115,23 @@ static int send_usb_cmd(struct ccp_device *ccp, u8 command, u8 byte1, u8 byte2, unsigned long t; int ret; - memset(ccp->buffer, 0x00, OUT_BUFFER_SIZE); - ccp->buffer[0] = command; - ccp->buffer[1] = byte1; - ccp->buffer[2] = byte2; - ccp->buffer[3] = byte3; - + memset(ccp->cmd_buffer, 0x00, OUT_BUFFER_SIZE); + ccp->cmd_buffer[0] = command; + ccp->cmd_buffer[1] = byte1; + ccp->cmd_buffer[2] = byte2; + ccp->cmd_buffer[3] = byte3; + + /* + * Disable raw event parsing for a moment to safely reinitialize the + * completion. Reinit is done because hidraw could have triggered + * the raw event parsing and marked the ccp->wait_input_report + * completion as done. + */ + spin_lock_bh(&ccp->wait_input_report_lock); reinit_completion(&ccp->wait_input_report); + spin_unlock_bh(&ccp->wait_input_report_lock); - ret = hid_hw_output_report(ccp->hdev, ccp->buffer, OUT_BUFFER_SIZE); + ret = hid_hw_output_report(ccp->hdev, ccp->cmd_buffer, OUT_BUFFER_SIZE); if (ret < 0) return ret; @@ -135,11 +147,12 @@ static int ccp_raw_event(struct hid_device *hdev, struct hid_report *report, u8 struct ccp_device *ccp = hid_get_drvdata(hdev); /* only copy buffer when requested */ - if (completion_done(&ccp->wait_input_report)) - return 0; - - memcpy(ccp->buffer, data, min(IN_BUFFER_SIZE, size)); - complete(&ccp->wait_input_report); + spin_lock(&ccp->wait_input_report_lock); + if (!completion_done(&ccp->wait_input_report)) { + memcpy(ccp->buffer, data, min(IN_BUFFER_SIZE, size)); + complete_all(&ccp->wait_input_report); + } + spin_unlock(&ccp->wait_input_report_lock); return 0; } @@ -492,7 +505,11 @@ static int ccp_probe(struct hid_device *hdev, const struct hid_device_id *id) if (!ccp) return -ENOMEM; - ccp->buffer = devm_kmalloc(&hdev->dev, OUT_BUFFER_SIZE, GFP_KERNEL); + ccp->cmd_buffer = devm_kmalloc(&hdev->dev, OUT_BUFFER_SIZE, GFP_KERNEL); + if (!ccp->cmd_buffer) + return -ENOMEM; + + ccp->buffer = devm_kmalloc(&hdev->dev, IN_BUFFER_SIZE, GFP_KERNEL); if (!ccp->buffer) return -ENOMEM; @@ -510,7 +527,9 @@ static int ccp_probe(struct hid_device *hdev, const struct hid_device_id *id) ccp->hdev = hdev; hid_set_drvdata(hdev, ccp); + mutex_init(&ccp->mutex); + spin_lock_init(&ccp->wait_input_report_lock); init_completion(&ccp->wait_input_report); hid_device_io_start(hdev); diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c index 3daaf22378..d6dfa268f3 100644 --- a/drivers/hwmon/pmbus/ucd9000.c +++ b/drivers/hwmon/pmbus/ucd9000.c @@ -80,11 +80,11 @@ struct ucd9000_debugfs_entry { * It has been observed that the UCD90320 randomly fails register access when * doing another access right on the back of a register write. To mitigate this * make sure that there is a minimum delay between a write access and the - * following access. The 250us is based on experimental data. At a delay of - * 200us the issue seems to go away. Add a bit of extra margin to allow for + * following access. The 500 is based on experimental data. At a delay of + * 350us the issue seems to go away. Add a bit of extra margin to allow for * system to system differences. */ -#define UCD90320_WAIT_DELAY_US 250 +#define UCD90320_WAIT_DELAY_US 500 static inline void ucd90320_wait(const struct ucd9000_data *data) { diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c index 18546ebc8e..0365643029 100644 --- a/drivers/hwmon/shtc1.c +++ b/drivers/hwmon/shtc1.c @@ -238,7 +238,7 @@ static int shtc1_probe(struct i2c_client *client) if (np) { data->setup.blocking_io = of_property_read_bool(np, "sensirion,blocking-io"); - data->setup.high_precision = !of_property_read_bool(np, "sensicon,low-precision"); + data->setup.high_precision = !of_property_read_bool(np, "sensirion,low-precision"); } else { if (client->dev.platform_data) data->setup = *(struct shtc1_platform_data *)dev->platform_data; diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 26d0d4485a..84734c7c19 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -1048,41 +1048,23 @@ static void etm4_init_arch_data(void *info) etmidr0 = etm4x_relaxed_read32(csa, TRCIDR0); /* INSTP0, bits[2:1] P0 tracing support field */ - if (BMVAL(etmidr0, 1, 1) && BMVAL(etmidr0, 2, 2)) - drvdata->instrp0 = true; - else - drvdata->instrp0 = false; - + drvdata->instrp0 = !!(FIELD_GET(TRCIDR0_INSTP0_MASK, etmidr0) == 0b11); /* TRCBB, bit[5] Branch broadcast tracing support bit */ - if (BMVAL(etmidr0, 5, 5)) - drvdata->trcbb = true; - else - drvdata->trcbb = false; - + drvdata->trcbb = !!(etmidr0 & TRCIDR0_TRCBB); /* TRCCOND, bit[6] Conditional instruction tracing support bit */ - if (BMVAL(etmidr0, 6, 6)) - drvdata->trccond = true; - else - drvdata->trccond = false; - + drvdata->trccond = !!(etmidr0 & TRCIDR0_TRCCOND); /* TRCCCI, bit[7] Cycle counting instruction bit */ - if (BMVAL(etmidr0, 7, 7)) - drvdata->trccci = true; - else - drvdata->trccci = false; - + drvdata->trccci = !!(etmidr0 & TRCIDR0_TRCCCI); /* RETSTACK, bit[9] Return stack bit */ - if (BMVAL(etmidr0, 9, 9)) - drvdata->retstack = true; - else - drvdata->retstack = false; - + drvdata->retstack = !!(etmidr0 & TRCIDR0_RETSTACK); /* NUMEVENT, bits[11:10] Number of events field */ - drvdata->nr_event = BMVAL(etmidr0, 10, 11); + drvdata->nr_event = FIELD_GET(TRCIDR0_NUMEVENT_MASK, etmidr0); /* QSUPP, bits[16:15] Q element support field */ - drvdata->q_support = BMVAL(etmidr0, 15, 16); + drvdata->q_support = FIELD_GET(TRCIDR0_QSUPP_MASK, etmidr0); + if (drvdata->q_support) + drvdata->q_filt = !!(etmidr0 & TRCIDR0_QFILT); /* TSSIZE, bits[28:24] Global timestamp size field */ - drvdata->ts_size = BMVAL(etmidr0, 24, 28); + drvdata->ts_size = FIELD_GET(TRCIDR0_TSSIZE_MASK, etmidr0); /* maximum size of resources */ etmidr2 = etm4x_relaxed_read32(csa, TRCIDR2); @@ -1602,16 +1584,14 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trcccctlr = etm4x_read32(csa, TRCCCCTLR); state->trcbbctlr = etm4x_read32(csa, TRCBBCTLR); state->trctraceidr = etm4x_read32(csa, TRCTRACEIDR); - state->trcqctlr = etm4x_read32(csa, TRCQCTLR); + if (drvdata->q_filt) + state->trcqctlr = etm4x_read32(csa, TRCQCTLR); state->trcvictlr = etm4x_read32(csa, TRCVICTLR); state->trcviiectlr = etm4x_read32(csa, TRCVIIECTLR); state->trcvissctlr = etm4x_read32(csa, TRCVISSCTLR); if (drvdata->nr_pe_cmp) state->trcvipcssctlr = etm4x_read32(csa, TRCVIPCSSCTLR); - state->trcvdctlr = etm4x_read32(csa, TRCVDCTLR); - state->trcvdsacctlr = etm4x_read32(csa, TRCVDSACCTLR); - state->trcvdarcctlr = etm4x_read32(csa, TRCVDARCCTLR); for (i = 0; i < drvdata->nrseqstate - 1; i++) state->trcseqevr[i] = etm4x_read32(csa, TRCSEQEVRn(i)); @@ -1628,7 +1608,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trccntvr[i] = etm4x_read32(csa, TRCCNTVRn(i)); } - for (i = 0; i < drvdata->nr_resource * 2; i++) + /* Resource selector pair 0 is reserved */ + for (i = 2; i < drvdata->nr_resource * 2; i++) state->trcrsctlr[i] = etm4x_read32(csa, TRCRSCTLRn(i)); for (i = 0; i < drvdata->nr_ss_cmp; i++) { @@ -1697,8 +1678,10 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) { int i; struct etmv4_save_state *state = drvdata->save_state; - struct csdev_access tmp_csa = CSDEV_ACCESS_IOMEM(drvdata->base); - struct csdev_access *csa = &tmp_csa; + struct csdev_access *csa = &drvdata->csdev->access; + + if (WARN_ON(!drvdata->csdev)) + return; etm4_cs_unlock(drvdata, csa); etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET); @@ -1717,16 +1700,14 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) etm4x_relaxed_write32(csa, state->trcccctlr, TRCCCCTLR); etm4x_relaxed_write32(csa, state->trcbbctlr, TRCBBCTLR); etm4x_relaxed_write32(csa, state->trctraceidr, TRCTRACEIDR); - etm4x_relaxed_write32(csa, state->trcqctlr, TRCQCTLR); + if (drvdata->q_filt) + etm4x_relaxed_write32(csa, state->trcqctlr, TRCQCTLR); etm4x_relaxed_write32(csa, state->trcvictlr, TRCVICTLR); etm4x_relaxed_write32(csa, state->trcviiectlr, TRCVIIECTLR); etm4x_relaxed_write32(csa, state->trcvissctlr, TRCVISSCTLR); if (drvdata->nr_pe_cmp) etm4x_relaxed_write32(csa, state->trcvipcssctlr, TRCVIPCSSCTLR); - etm4x_relaxed_write32(csa, state->trcvdctlr, TRCVDCTLR); - etm4x_relaxed_write32(csa, state->trcvdsacctlr, TRCVDSACCTLR); - etm4x_relaxed_write32(csa, state->trcvdarcctlr, TRCVDARCCTLR); for (i = 0; i < drvdata->nrseqstate - 1; i++) etm4x_relaxed_write32(csa, state->trcseqevr[i], TRCSEQEVRn(i)); @@ -1743,7 +1724,8 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) etm4x_relaxed_write32(csa, state->trccntvr[i], TRCCNTVRn(i)); } - for (i = 0; i < drvdata->nr_resource * 2; i++) + /* Resource selector pair 0 is reserved */ + for (i = 2; i < drvdata->nr_resource * 2; i++) etm4x_relaxed_write32(csa, state->trcrsctlr[i], TRCRSCTLRn(i)); for (i = 0; i < drvdata->nr_ss_cmp; i++) { @@ -2022,6 +2004,9 @@ static int etm4_probe_platform_dev(struct platform_device *pdev) ret = etm4_probe(&pdev->dev, NULL, 0); pm_runtime_put(&pdev->dev); + if (ret) + pm_runtime_disable(&pdev->dev); + return ret; } diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index a0f3f0ba33..3ab528c6b9 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -43,9 +43,6 @@ #define TRCVIIECTLR 0x084 #define TRCVISSCTLR 0x088 #define TRCVIPCSSCTLR 0x08C -#define TRCVDCTLR 0x0A0 -#define TRCVDSACCTLR 0x0A4 -#define TRCVDARCCTLR 0x0A8 /* Derived resources registers */ #define TRCSEQEVRn(n) (0x100 + (n * 4)) /* n = 0-2 */ #define TRCSEQRSTEVR 0x118 @@ -90,9 +87,6 @@ /* Address Comparator registers n = 0-15 */ #define TRCACVRn(n) (0x400 + (n * 8)) #define TRCACATRn(n) (0x480 + (n * 8)) -/* Data Value Comparator Value registers, n = 0-7 */ -#define TRCDVCVRn(n) (0x500 + (n * 16)) -#define TRCDVCMRn(n) (0x580 + (n * 16)) /* ContextID/Virtual ContextID comparators, n = 0-7 */ #define TRCCIDCVRn(n) (0x600 + (n * 8)) #define TRCVMIDCVRn(n) (0x640 + (n * 8)) @@ -131,6 +125,20 @@ #define TRCRSR_TA BIT(12) +/* + * Bit positions of registers that are defined above, in the sysreg.h style + * of _MASK for multi bit fields and BIT() for single bits. + */ +#define TRCIDR0_INSTP0_MASK GENMASK(2, 1) +#define TRCIDR0_TRCBB BIT(5) +#define TRCIDR0_TRCCOND BIT(6) +#define TRCIDR0_TRCCCI BIT(7) +#define TRCIDR0_RETSTACK BIT(9) +#define TRCIDR0_NUMEVENT_MASK GENMASK(11, 10) +#define TRCIDR0_QFILT BIT(14) +#define TRCIDR0_QSUPP_MASK GENMASK(16, 15) +#define TRCIDR0_TSSIZE_MASK GENMASK(28, 24) + /* * System instructions to access ETM registers. * See ETMv4.4 spec ARM IHI0064F section 4.3.6 System instructions @@ -174,9 +182,6 @@ /* List of registers accessible via System instructions */ #define ETM4x_ONLY_SYSREG_LIST(op, val) \ CASE_##op((val), TRCPROCSELR) \ - CASE_##op((val), TRCVDCTLR) \ - CASE_##op((val), TRCVDSACCTLR) \ - CASE_##op((val), TRCVDARCCTLR) \ CASE_##op((val), TRCOSLAR) #define ETM_COMMON_SYSREG_LIST(op, val) \ @@ -324,22 +329,6 @@ CASE_##op((val), TRCACATRn(13)) \ CASE_##op((val), TRCACATRn(14)) \ CASE_##op((val), TRCACATRn(15)) \ - CASE_##op((val), TRCDVCVRn(0)) \ - CASE_##op((val), TRCDVCVRn(1)) \ - CASE_##op((val), TRCDVCVRn(2)) \ - CASE_##op((val), TRCDVCVRn(3)) \ - CASE_##op((val), TRCDVCVRn(4)) \ - CASE_##op((val), TRCDVCVRn(5)) \ - CASE_##op((val), TRCDVCVRn(6)) \ - CASE_##op((val), TRCDVCVRn(7)) \ - CASE_##op((val), TRCDVCMRn(0)) \ - CASE_##op((val), TRCDVCMRn(1)) \ - CASE_##op((val), TRCDVCMRn(2)) \ - CASE_##op((val), TRCDVCMRn(3)) \ - CASE_##op((val), TRCDVCMRn(4)) \ - CASE_##op((val), TRCDVCMRn(5)) \ - CASE_##op((val), TRCDVCMRn(6)) \ - CASE_##op((val), TRCDVCMRn(7)) \ CASE_##op((val), TRCCIDCVRn(0)) \ CASE_##op((val), TRCCIDCVRn(1)) \ CASE_##op((val), TRCCIDCVRn(2)) \ @@ -821,9 +810,6 @@ struct etmv4_save_state { u32 trcviiectlr; u32 trcvissctlr; u32 trcvipcssctlr; - u32 trcvdctlr; - u32 trcvdsacctlr; - u32 trcvdarcctlr; u32 trcseqevr[ETM_MAX_SEQ_STATES]; u32 trcseqrstevr; @@ -895,6 +881,7 @@ struct etmv4_save_state { * @os_unlock: True if access to management registers is allowed. * @instrp0: Tracing of load and store instructions * as P0 elements is supported. + * @q_filt: Q element filtering support, if Q elements are supported. * @trcbb: Indicates if the trace unit supports branch broadcast tracing. * @trccond: If the trace unit supports conditional * instruction tracing. @@ -953,6 +940,7 @@ struct etmv4_drvdata { bool boot_enable; bool os_unlock; bool instrp0; + bool q_filt; bool trcbb; bool trccond; bool retstack; diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 147d338c19..648893f9e4 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -289,6 +289,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7e24), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Meteor Lake-S CPU */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xae24), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Raptor Lake-S */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7a26), diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index 2712e699ba..ae9ea3a1fa 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -868,8 +868,11 @@ int stm_register_device(struct device *parent, struct stm_data *stm_data, return -ENOMEM; stm->major = register_chrdev(0, stm_data->name, &stm_fops); - if (stm->major < 0) - goto err_free; + if (stm->major < 0) { + err = stm->major; + vfree(stm); + return err; + } device_initialize(&stm->dev); stm->dev.devt = MKDEV(stm->major, 0); @@ -913,10 +916,8 @@ int stm_register_device(struct device *parent, struct stm_data *stm_data, err_device: unregister_chrdev(stm->major, stm_data->name); - /* matches device_initialize() above */ + /* calls stm_device_release() */ put_device(&stm->dev); -err_free: - vfree(stm); return err; } diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 87c2c7c847..7844fba281 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1416,7 +1416,6 @@ static int i801_add_mux(struct i801_priv *priv) mux_config->gpios[i], "mux", 0); } gpiod_add_lookup_table(lookup); - priv->lookup = lookup; /* * Register the mux device, we use PLATFORM_DEVID_NONE here @@ -1430,7 +1429,10 @@ static int i801_add_mux(struct i801_priv *priv) sizeof(struct i2c_mux_gpio_platform_data)); if (IS_ERR(priv->mux_pdev)) { gpiod_remove_lookup_table(lookup); + devm_kfree(dev, lookup); dev_err(dev, "Failed to register i2c-mux-gpio device\n"); + } else { + priv->lookup = lookup; } return PTR_ERR_OR_ZERO(priv->mux_pdev); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 8fb065caf3..1810a994c0 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2072,13 +2072,18 @@ static int i2c_check_for_quirks(struct i2c_adapter *adap, struct i2c_msg *msgs, * Returns negative errno, else the number of messages executed. * * Adapter lock must be held when calling this function. No debug logging - * takes place. adap->algo->master_xfer existence isn't checked. + * takes place. */ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { unsigned long orig_jiffies; int ret, try; + if (!adap->algo->master_xfer) { + dev_dbg(&adap->dev, "I2C level transfers not supported\n"); + return -EOPNOTSUPP; + } + if (WARN_ON(!msgs || num < 1)) return -EINVAL; @@ -2145,11 +2150,6 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { int ret; - if (!adap->algo->master_xfer) { - dev_dbg(&adap->dev, "I2C level transfers not supported\n"); - return -EOPNOTSUPP; - } - /* REVISIT the fault reporting model here is weak: * * - When we get an error after receiving N bytes from a slave, diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 15a412e88d..7fc82b003b 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -356,6 +356,19 @@ static void svc_i3c_master_ibi_work(struct work_struct *work) int ret; mutex_lock(&master->lock); + /* + * IBIWON may be set before SVC_I3C_MCTRL_REQUEST_AUTO_IBI, causing + * readl_relaxed_poll_timeout() to return immediately. Consequently, + * ibitype will be 0 since it was last updated only after the 8th SCL + * cycle, leading to missed client IBI handlers. + * + * A typical scenario is when IBIWON occurs and bus arbitration is lost + * at svc_i3c_master_priv_xfers(). + * + * Clear SVC_I3C_MINT_IBIWON before sending SVC_I3C_MCTRL_REQUEST_AUTO_IBI. + */ + writel(SVC_I3C_MINT_IBIWON, master->regs + SVC_I3C_MSTATUS); + /* Acknowledge the incoming interrupt with the AUTOIBI mechanism */ writel(SVC_I3C_MCTRL_REQUEST_AUTO_IBI | SVC_I3C_MCTRL_IBIRESP_AUTO, @@ -370,9 +383,6 @@ static void svc_i3c_master_ibi_work(struct work_struct *work) goto reenable_ibis; } - /* Clear the interrupt status */ - writel(SVC_I3C_MINT_IBIWON, master->regs + SVC_I3C_MSTATUS); - status = readl(master->regs + SVC_I3C_MSTATUS); ibitype = SVC_I3C_MSTATUS_IBITYPE(status); ibiaddr = SVC_I3C_MSTATUS_IBIADDR(status); diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c index df600d2917..ffae30e5eb 100644 --- a/drivers/iio/accel/mxc4005.c +++ b/drivers/iio/accel/mxc4005.c @@ -27,9 +27,13 @@ #define MXC4005_REG_ZOUT_UPPER 0x07 #define MXC4005_REG_ZOUT_LOWER 0x08 +#define MXC4005_REG_INT_MASK0 0x0A + #define MXC4005_REG_INT_MASK1 0x0B #define MXC4005_REG_INT_MASK1_BIT_DRDYE 0x01 +#define MXC4005_REG_INT_CLR0 0x00 + #define MXC4005_REG_INT_CLR1 0x01 #define MXC4005_REG_INT_CLR1_BIT_DRDYC 0x01 @@ -113,7 +117,9 @@ static bool mxc4005_is_readable_reg(struct device *dev, unsigned int reg) static bool mxc4005_is_writeable_reg(struct device *dev, unsigned int reg) { switch (reg) { + case MXC4005_REG_INT_CLR0: case MXC4005_REG_INT_CLR1: + case MXC4005_REG_INT_MASK0: case MXC4005_REG_INT_MASK1: case MXC4005_REG_CONTROL: return true; @@ -330,17 +336,13 @@ static int mxc4005_set_trigger_state(struct iio_trigger *trig, { struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); struct mxc4005_data *data = iio_priv(indio_dev); + unsigned int val; int ret; mutex_lock(&data->mutex); - if (state) { - ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, - MXC4005_REG_INT_MASK1_BIT_DRDYE); - } else { - ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, - ~MXC4005_REG_INT_MASK1_BIT_DRDYE); - } + val = state ? MXC4005_REG_INT_MASK1_BIT_DRDYE : 0; + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, val); if (ret < 0) { mutex_unlock(&data->mutex); dev_err(data->dev, "failed to update reg_int_mask1"); @@ -382,6 +384,14 @@ static int mxc4005_chip_init(struct mxc4005_data *data) dev_dbg(data->dev, "MXC4005 chip id %02x\n", reg); + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK0, 0); + if (ret < 0) + return dev_err_probe(data->dev, ret, "writing INT_MASK0\n"); + + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, 0); + if (ret < 0) + return dev_err_probe(data->dev, ret, "writing INT_MASK1\n"); + return 0; } diff --git a/drivers/iio/imu/adis16475.c b/drivers/iio/imu/adis16475.c index a3b9745dd1..e8238459bd 100644 --- a/drivers/iio/imu/adis16475.c +++ b/drivers/iio/imu/adis16475.c @@ -1145,6 +1145,7 @@ static int adis16475_config_sync_mode(struct adis16475 *st) struct device *dev = &st->adis.spi->dev; const struct adis16475_sync *sync; u32 sync_mode; + u16 val; /* default to internal clk */ st->clk_freq = st->info->int_clk * 1000; @@ -1214,8 +1215,9 @@ static int adis16475_config_sync_mode(struct adis16475 *st) * I'm keeping this for simplicity and avoiding extra variables * in chip_info. */ + val = ADIS16475_SYNC_MODE(sync->sync_mode); ret = __adis_update_bits(&st->adis, ADIS16475_REG_MSG_CTRL, - ADIS16475_SYNC_MODE_MASK, sync->sync_mode); + ADIS16475_SYNC_MODE_MASK, val); if (ret) return ret; diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c index 1b6b9530f1..7fdc7a0147 100644 --- a/drivers/iio/pressure/dps310.c +++ b/drivers/iio/pressure/dps310.c @@ -730,7 +730,7 @@ static int dps310_read_pressure(struct dps310_data *data, int *val, int *val2, } } -static int dps310_calculate_temp(struct dps310_data *data) +static int dps310_calculate_temp(struct dps310_data *data, int *val) { s64 c0; s64 t; @@ -746,7 +746,9 @@ static int dps310_calculate_temp(struct dps310_data *data) t = c0 + ((s64)data->temp_raw * (s64)data->c1); /* Convert to milliCelsius and scale the temperature */ - return (int)div_s64(t * 1000LL, kt); + *val = (int)div_s64(t * 1000LL, kt); + + return 0; } static int dps310_read_temp(struct dps310_data *data, int *val, int *val2, @@ -768,11 +770,10 @@ static int dps310_read_temp(struct dps310_data *data, int *val, int *val2, if (rc) return rc; - rc = dps310_calculate_temp(data); - if (rc < 0) + rc = dps310_calculate_temp(data, val); + if (rc) return rc; - *val = rc; return IIO_VAL_INT; case IIO_CHAN_INFO_OVERSAMPLING_RATIO: diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 680c3ac8cd..c8a7fe5fbc 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -34,6 +34,7 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); +#define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */ static const char * const ibcm_rej_reason_strs[] = { [IB_CM_REJ_NO_QP] = "no QP", [IB_CM_REJ_NO_EEC] = "no EEC", @@ -1032,13 +1033,26 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) } } +static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id, + enum ib_cm_state old_state) +{ + struct cm_id_private *cm_id_priv; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + pr_err("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__, + cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount)); +} + static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; + enum ib_cm_state old_state; struct cm_work *work; + int ret; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irq(&cm_id_priv->lock); + old_state = cm_id->state; retest: switch (cm_id->state) { case IB_CM_LISTEN: @@ -1142,7 +1156,14 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id)); cm_deref_id(cm_id_priv); - wait_for_completion(&cm_id_priv->comp); + do { + ret = wait_for_completion_timeout(&cm_id_priv->comp, + msecs_to_jiffies( + CM_DESTROY_ID_WAIT_TIMEOUT)); + if (!ret) /* timeout happened */ + cm_destroy_id_wait_timeout(cm_id, old_state); + } while (!ret); + while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h index e9d282679e..944d907124 100644 --- a/drivers/infiniband/core/cm_trace.h +++ b/drivers/infiniband/core/cm_trace.h @@ -16,7 +16,7 @@ #include #include -#include +#include /* * enum ib_cm_state, from include/rdma/ib_cm.h diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h index e45264267b..47f3c6e4be 100644 --- a/drivers/infiniband/core/cma_trace.h +++ b/drivers/infiniband/core/cma_trace.h @@ -15,7 +15,7 @@ #define _TRACE_RDMA_CMA_H #include -#include +#include DECLARE_EVENT_CLASS(cma_fsm_class, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 2d84a6b3f0..fa84ce3307 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -61,16 +61,16 @@ enum { (sizeof(struct scatterlist) + sizeof(void *))) #define check_whether_bt_num_3(type, hop_num) \ - (type < HEM_TYPE_MTT && hop_num == 2) + ((type) < HEM_TYPE_MTT && (hop_num) == 2) #define check_whether_bt_num_2(type, hop_num) \ - ((type < HEM_TYPE_MTT && hop_num == 1) || \ - (type >= HEM_TYPE_MTT && hop_num == 2)) + (((type) < HEM_TYPE_MTT && (hop_num) == 1) || \ + ((type) >= HEM_TYPE_MTT && (hop_num) == 2)) #define check_whether_bt_num_1(type, hop_num) \ - ((type < HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0) || \ - (type >= HEM_TYPE_MTT && hop_num == 1) || \ - (type >= HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0)) + (((type) < HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0) || \ + ((type) >= HEM_TYPE_MTT && (hop_num) == 1) || \ + ((type) >= HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0)) struct hns_roce_hem_chunk { struct list_head list; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7376f012ec..4accc9efa6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2202,7 +2202,7 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->gid_table_len[0] = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); - caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_num = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); } else { u32 func_num = max_t(u32, 1, hr_dev->func_num); @@ -3514,8 +3514,9 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, wc->status == IB_WC_WR_FLUSH_ERR)) return; - ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status); - print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe, + ibdev_err_ratelimited(&hr_dev->ib_dev, "error cqe status 0x%x:\n", + cqe_status); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 4, cqe, cq->cqe_size, false); wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 80b9a9a45c..e2d2f8f2bd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -38,6 +38,7 @@ #include #include #include +#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 12c482f4a1..7106e51d5f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -444,18 +444,18 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); struct hns_roce_mtr *mtr = &mr->pbl_mtr; - int ret = 0; + int ret, sg_num = 0; mr->npages = 0; mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count, sizeof(dma_addr_t), GFP_KERNEL); if (!mr->page_list) - return ret; + return sg_num; - ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); - if (ret < 1) { + sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); + if (sg_num < 1) { ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n", - mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); + mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, sg_num); goto err_page_list; } @@ -466,17 +466,16 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages); if (ret) { ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); - ret = 0; + sg_num = 0; } else { mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size); - ret = mr->npages; } err_page_list: kvfree(mr->page_list); mr->page_list = NULL; - return ret; + return sg_num; } static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index e64ef6903f..35001fb99b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -100,7 +100,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) goto err_out; } - ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); + ret = xa_err(xa_store_irq(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); if (ret) { ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); goto err_put; @@ -132,7 +132,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) err_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); err_xa: - xa_erase(&srq_table->xa, srq->srqn); + xa_erase_irq(&srq_table->xa, srq->srqn); err_put: hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); err_out: @@ -151,7 +151,7 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); - xa_erase(&srq_table->xa, srq->srqn); + xa_erase_irq(&srq_table->xa, srq->srqn); if (refcount_dec_and_test(&srq->refcount)) complete(&srq->free); diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index f6f2df855c..1082841807 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -166,7 +166,8 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num, mdev = dev->mdev; mdev_port_num = 1; } - if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) { + if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1 && + !mlx5_core_mp_enabled(mdev)) { /* set local port to one for Function-Per-Port HCA. */ mdev = dev->mdev; mdev_port_num = 1; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index cf203f879d..191078b6e9 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1687,7 +1687,8 @@ static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, unsigned int diffs = current_access_flags ^ target_access_flags; if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) + IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING | + IB_ACCESS_REMOTE_ATOMIC)) return false; return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, target_access_flags); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index a0c5f3bdc3..8665e50640 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -441,6 +441,7 @@ static int remove_device_files(struct super_block *sb, return PTR_ERR(dir); } simple_recursive_removal(dir, NULL); + dput(dir); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 8e0f9c489c..f6ef782ce7 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -35,6 +35,8 @@ void rxe_dealloc(struct ib_device *ib_dev) if (rxe->tfm) crypto_free_shash(rxe->tfm); + + mutex_destroy(&rxe->usdev_lock); } /* initialize rxe device parameters */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 0322dc7539..323d5d5db2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -185,8 +185,12 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ppriv = ipoib_priv(pdev); - snprintf(intf_name, sizeof(intf_name), "%s.%04x", - ppriv->dev->name, pkey); + /* If you increase IFNAMSIZ, update snprintf below + * to allow longer names. + */ + BUILD_BUG_ON(IFNAMSIZ != 16); + snprintf(intf_name, sizeof(intf_name), "%.10s.%04x", ppriv->dev->name, + pkey); ndev = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); if (IS_ERR(ndev)) { diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index 6f38aa23a1..b3215c97ee 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -42,8 +42,8 @@ struct ims_pcu_backlight { #define IMS_PCU_PART_NUMBER_LEN 15 #define IMS_PCU_SERIAL_NUMBER_LEN 8 #define IMS_PCU_DOM_LEN 8 -#define IMS_PCU_FW_VERSION_LEN (9 + 1) -#define IMS_PCU_BL_VERSION_LEN (9 + 1) +#define IMS_PCU_FW_VERSION_LEN 16 +#define IMS_PCU_BL_VERSION_LEN 16 #define IMS_PCU_BL_RESET_REASON_LEN (2 + 1) #define IMS_PCU_PCU_B_DEVICE_ID 5 diff --git a/drivers/input/misc/pm8xxx-vibrator.c b/drivers/input/misc/pm8xxx-vibrator.c index 53ad25eaf1..8bfe5c7b12 100644 --- a/drivers/input/misc/pm8xxx-vibrator.c +++ b/drivers/input/misc/pm8xxx-vibrator.c @@ -14,7 +14,8 @@ #define VIB_MAX_LEVEL_mV (3100) #define VIB_MIN_LEVEL_mV (1200) -#define VIB_MAX_LEVELS (VIB_MAX_LEVEL_mV - VIB_MIN_LEVEL_mV) +#define VIB_PER_STEP_mV (100) +#define VIB_MAX_LEVELS (VIB_MAX_LEVEL_mV - VIB_MIN_LEVEL_mV + VIB_PER_STEP_mV) #define MAX_FF_SPEED 0xff @@ -118,10 +119,10 @@ static void pm8xxx_work_handler(struct work_struct *work) vib->active = true; vib->level = ((VIB_MAX_LEVELS * vib->speed) / MAX_FF_SPEED) + VIB_MIN_LEVEL_mV; - vib->level /= 100; + vib->level /= VIB_PER_STEP_mV; } else { vib->active = false; - vib->level = VIB_MIN_LEVEL_mV / 100; + vib->level = VIB_MIN_LEVEL_mV / VIB_PER_STEP_mV; } pm8xxx_vib_set(vib, vib->active); diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c index 77cc653edc..e401934df4 100644 --- a/drivers/input/mouse/cyapa.c +++ b/drivers/input/mouse/cyapa.c @@ -1357,10 +1357,16 @@ static int __maybe_unused cyapa_suspend(struct device *dev) u8 power_mode; int error; - error = mutex_lock_interruptible(&cyapa->state_sync_lock); + error = mutex_lock_interruptible(&cyapa->input->mutex); if (error) return error; + error = mutex_lock_interruptible(&cyapa->state_sync_lock); + if (error) { + mutex_unlock(&cyapa->input->mutex); + return error; + } + /* * Runtime PM is enable only when device is in operational mode and * users in use, so need check it before disable it to @@ -1395,6 +1401,8 @@ static int __maybe_unused cyapa_suspend(struct device *dev) cyapa->irq_wake = (enable_irq_wake(client->irq) == 0); mutex_unlock(&cyapa->state_sync_lock); + mutex_unlock(&cyapa->input->mutex); + return 0; } @@ -1404,6 +1412,7 @@ static int __maybe_unused cyapa_resume(struct device *dev) struct cyapa *cyapa = i2c_get_clientdata(client); int error; + mutex_lock(&cyapa->input->mutex); mutex_lock(&cyapa->state_sync_lock); if (device_may_wakeup(dev) && cyapa->irq_wake) { @@ -1422,6 +1431,7 @@ static int __maybe_unused cyapa_resume(struct device *dev) enable_irq(client->irq); mutex_unlock(&cyapa->state_sync_lock); + mutex_unlock(&cyapa->input->mutex); return 0; } diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 258d5fe3d3..aa32371f04 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -1196,7 +1196,11 @@ static int rmi_driver_probe(struct device *dev) } rmi_driver_set_input_params(rmi_dev, data->input); data->input->phys = devm_kasprintf(dev, GFP_KERNEL, - "%s/input0", dev_name(dev)); + "%s/input0", dev_name(dev)); + if (!data->input->phys) { + retval = -ENOMEM; + goto err; + } } retval = rmi_init_functions(data); diff --git a/drivers/input/serio/ioc3kbd.c b/drivers/input/serio/ioc3kbd.c index d51bfe912d..676b0bda3d 100644 --- a/drivers/input/serio/ioc3kbd.c +++ b/drivers/input/serio/ioc3kbd.c @@ -190,7 +190,7 @@ static int ioc3kbd_probe(struct platform_device *pdev) return 0; } -static int ioc3kbd_remove(struct platform_device *pdev) +static void ioc3kbd_remove(struct platform_device *pdev) { struct ioc3kbd_data *d = platform_get_drvdata(pdev); @@ -198,13 +198,18 @@ static int ioc3kbd_remove(struct platform_device *pdev) serio_unregister_port(d->kbd); serio_unregister_port(d->aux); - - return 0; } +static const struct platform_device_id ioc3kbd_id_table[] = { + { "ioc3-kbd", }, + { } +}; +MODULE_DEVICE_TABLE(platform, ioc3kbd_id_table); + static struct platform_driver ioc3kbd_driver = { .probe = ioc3kbd_probe, - .remove = ioc3kbd_remove, + .remove_new = ioc3kbd_remove, + .id_table = ioc3kbd_id_table, .driver = { .name = "ioc3-kbd", }, diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 8cd63e6ccd..aa47d955de 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1291,6 +1291,19 @@ static unsigned long iommu_dma_get_merge_boundary(struct device *dev) return (1UL << __ffs(domain->pgsize_bitmap)) - 1; } +static size_t iommu_dma_opt_mapping_size(void) +{ + return iova_rcache_range(); +} + +static size_t iommu_dma_max_mapping_size(struct device *dev) +{ + if (dev_is_untrusted(dev)) + return swiotlb_max_mapping_size(dev); + + return SIZE_MAX; +} + static const struct dma_map_ops iommu_dma_ops = { .alloc = iommu_dma_alloc, .free = iommu_dma_free, @@ -1313,6 +1326,8 @@ static const struct dma_map_ops iommu_dma_ops = { .map_resource = iommu_dma_map_resource, .unmap_resource = iommu_dma_unmap_resource, .get_merge_boundary = iommu_dma_get_merge_boundary, + .opt_mapping_size = iommu_dma_opt_mapping_size, + .max_mapping_size = iommu_dma_max_mapping_size, }; /* diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 3a9468b1d2..a96c9a15c9 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -88,7 +88,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) struct page *pages; int irq, ret; - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); + pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); if (!pages) { pr_warn("IOMMU: %s: Failed to allocate page request queue\n", iommu->name); diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 0835f32e04..f6dfb9e45e 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -27,6 +27,11 @@ static void free_iova_rcaches(struct iova_domain *iovad); static void fq_destroy_all_entries(struct iova_domain *iovad); static void fq_flush_timeout(struct timer_list *t); +unsigned long iova_rcache_range(void) +{ + return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1); +} + static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node) { struct iova_domain *iovad; diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2ae46fa6b3..04ac40d11f 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1101,6 +1101,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data}, {} }; +MODULE_DEVICE_TABLE(of, mtk_iommu_of_ids); static struct platform_driver mtk_iommu_driver = { .probe = mtk_iommu_probe, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index fe1c3123a7..02668fd340 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -576,6 +576,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt2701-m4u", }, {} }; +MODULE_DEVICE_TABLE(of, mtk_iommu_of_ids); static const struct component_master_ops mtk_iommu_com_ops = { .bind = mtk_iommu_bind, diff --git a/drivers/irqchip/irq-alpine-msi.c b/drivers/irqchip/irq-alpine-msi.c index fc1ef7de37..c9ffd69dfc 100644 --- a/drivers/irqchip/irq-alpine-msi.c +++ b/drivers/irqchip/irq-alpine-msi.c @@ -165,7 +165,7 @@ static int alpine_msix_middle_domain_alloc(struct irq_domain *domain, return 0; err_sgi: - irq_domain_free_irqs_parent(domain, virq, i - 1); + irq_domain_free_irqs_parent(domain, virq, i); alpine_msix_free_sgi(priv, sgi, nr_irqs); return err; } diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 78eeb382c2..fa89e590c1 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4530,13 +4530,8 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq set_bit(i, bitmap); } - if (err) { - if (i > 0) - its_vpe_irq_domain_free(domain, virq, i); - - its_lpi_free(bitmap, base, nr_ids); - its_free_prop_table(vprop_page); - } + if (err) + its_vpe_irq_domain_free(domain, virq, i); return err; } diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c index 32562b7e68..254a58fbb8 100644 --- a/drivers/irqchip/irq-loongson-pch-msi.c +++ b/drivers/irqchip/irq-loongson-pch-msi.c @@ -132,7 +132,7 @@ static int pch_msi_middle_domain_alloc(struct irq_domain *domain, err_hwirq: pch_msi_free_hwirq(priv, hwirq, nr_irqs); - irq_domain_free_irqs_parent(domain, virq, i - 1); + irq_domain_free_irqs_parent(domain, virq, i); return err; } diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c index db9270da5b..b6ddf1d47c 100644 --- a/drivers/macintosh/via-macii.c +++ b/drivers/macintosh/via-macii.c @@ -140,24 +140,19 @@ static int macii_probe(void) /* Initialize the driver */ static int macii_init(void) { - unsigned long flags; int err; - local_irq_save(flags); - err = macii_init_via(); if (err) - goto out; + return err; err = request_irq(IRQ_MAC_ADB, macii_interrupt, 0, "ADB", macii_interrupt); if (err) - goto out; + return err; macii_state = idle; -out: - local_irq_restore(flags); - return err; + return 0; } /* initialize the hardware */ diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index df743650d8..ae372bc44f 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4083,7 +4083,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { - if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { + if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) { r = -EINVAL; ti->error = "Invalid bitmap_flush_interval argument"; goto bad; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 04769fb20c..5d1006142a 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -4049,7 +4049,9 @@ static void raid_resume(struct dm_target *ti) * Take this opportunity to check whether any failed * devices are reachable again. */ + mddev_lock_nointr(mddev); attempt_restore_of_faulty_devices(rs); + mddev_unlock(mddev); } if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index dcf34c6b05..d3716d5c45 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -686,8 +686,10 @@ static void dm_exception_table_exit(struct dm_exception_table *et, for (i = 0; i < size; i++) { slot = et->table + i; - hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) + hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) { kmem_cache_free(mem, ex); + cond_resched(); + } } kvfree(et->table); diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 49c46f3aea..b26e22dd9b 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1355,7 +1355,7 @@ __acquires(bitmap->lock) sector_t chunk = offset >> bitmap->chunkshift; unsigned long page = chunk >> PAGE_COUNTER_SHIFT; unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; - sector_t csize; + sector_t csize = ((sector_t)1) << bitmap->chunkshift; int err; if (page >= bitmap->pages) { @@ -1364,6 +1364,7 @@ __acquires(bitmap->lock) * End-of-device while looking for a whole page or * user set a huge number to sysfs bitmap_set_bits. */ + *blocks = csize - (offset & (csize - 1)); return NULL; } err = md_bitmap_checkpage(bitmap, page, create, 0); @@ -1372,8 +1373,7 @@ __acquires(bitmap->lock) bitmap->bp[page].map == NULL) csize = ((sector_t)1) << (bitmap->chunkshift + PAGE_COUNTER_SHIFT); - else - csize = ((sector_t)1) << bitmap->chunkshift; + *blocks = csize - (offset & (csize - 1)); if (err < 0) diff --git a/drivers/md/md.c b/drivers/md/md.c index c937ad7f6b..45ef1ddd2b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2526,6 +2526,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) fail: pr_warn("md: failed to register dev-%s for %s\n", b, mdname(mddev)); + mddev_destroy_serial_pool(mddev, rdev, false); return err; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c2a42486f9..bcd43cca94 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -36,7 +36,6 @@ */ #include -#include #include #include #include @@ -6486,6 +6485,9 @@ static void raid5d(struct md_thread *thread) int batch_size, released; unsigned int offset; + if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) + break; + released = release_stripe_list(conf, conf->temp_inactive_list); if (released) clear_bit(R5_DID_ALLOC, &conf->cache_state); @@ -6522,18 +6524,7 @@ static void raid5d(struct md_thread *thread) spin_unlock_irq(&conf->device_lock); md_check_recovery(mddev); spin_lock_irq(&conf->device_lock); - - /* - * Waiting on MD_SB_CHANGE_PENDING below may deadlock - * seeing md_check_recovery() is needed to clear - * the flag when using mdmon. - */ - continue; } - - wait_event_lock_irq(mddev->sb_wait, - !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), - conf->device_lock); } pr_debug("%d stripes handled\n", handled); diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c index 99ede1417d..1f8ac656ae 100644 --- a/drivers/media/cec/core/cec-adap.c +++ b/drivers/media/cec/core/cec-adap.c @@ -39,15 +39,6 @@ static void cec_fill_msg_report_features(struct cec_adapter *adap, */ #define CEC_XFER_TIMEOUT_MS (5 * 400 + 100) -#define call_op(adap, op, arg...) \ - (adap->ops->op ? adap->ops->op(adap, ## arg) : 0) - -#define call_void_op(adap, op, arg...) \ - do { \ - if (adap->ops->op) \ - adap->ops->op(adap, ## arg); \ - } while (0) - static int cec_log_addr2idx(const struct cec_adapter *adap, u8 log_addr) { int i; @@ -366,38 +357,48 @@ static void cec_data_completed(struct cec_data *data) /* * A pending CEC transmit needs to be cancelled, either because the CEC * adapter is disabled or the transmit takes an impossibly long time to - * finish. + * finish, or the reply timed out. * * This function is called with adap->lock held. */ -static void cec_data_cancel(struct cec_data *data, u8 tx_status) +static void cec_data_cancel(struct cec_data *data, u8 tx_status, u8 rx_status) { + struct cec_adapter *adap = data->adap; + /* * It's either the current transmit, or it is a pending * transmit. Take the appropriate action to clear it. */ - if (data->adap->transmitting == data) { - data->adap->transmitting = NULL; + if (adap->transmitting == data) { + adap->transmitting = NULL; } else { list_del_init(&data->list); if (!(data->msg.tx_status & CEC_TX_STATUS_OK)) - if (!WARN_ON(!data->adap->transmit_queue_sz)) - data->adap->transmit_queue_sz--; + if (!WARN_ON(!adap->transmit_queue_sz)) + adap->transmit_queue_sz--; } if (data->msg.tx_status & CEC_TX_STATUS_OK) { data->msg.rx_ts = ktime_get_ns(); - data->msg.rx_status = CEC_RX_STATUS_ABORTED; + data->msg.rx_status = rx_status; + if (!data->blocking) + data->msg.tx_status = 0; } else { data->msg.tx_ts = ktime_get_ns(); data->msg.tx_status |= tx_status | CEC_TX_STATUS_MAX_RETRIES; data->msg.tx_error_cnt++; data->attempts = 0; + if (!data->blocking) + data->msg.rx_status = 0; } /* Queue transmitted message for monitoring purposes */ - cec_queue_msg_monitor(data->adap, &data->msg, 1); + cec_queue_msg_monitor(adap, &data->msg, 1); + + if (!data->blocking && data->msg.sequence) + /* Allow drivers to react to a canceled transmit */ + call_void_op(adap, adap_nb_transmit_canceled, &data->msg); cec_data_completed(data); } @@ -418,15 +419,15 @@ static void cec_flush(struct cec_adapter *adap) while (!list_empty(&adap->transmit_queue)) { data = list_first_entry(&adap->transmit_queue, struct cec_data, list); - cec_data_cancel(data, CEC_TX_STATUS_ABORTED); + cec_data_cancel(data, CEC_TX_STATUS_ABORTED, 0); } if (adap->transmitting) - cec_data_cancel(adap->transmitting, CEC_TX_STATUS_ABORTED); + adap->transmit_in_progress_aborted = true; /* Cancel the pending timeout work. */ list_for_each_entry_safe(data, n, &adap->wait_queue, list) { if (cancel_delayed_work(&data->work)) - cec_data_cancel(data, CEC_TX_STATUS_OK); + cec_data_cancel(data, CEC_TX_STATUS_OK, CEC_RX_STATUS_ABORTED); /* * If cancel_delayed_work returned false, then * the cec_wait_timeout function is running, @@ -501,6 +502,15 @@ int cec_thread_func(void *_adap) goto unlock; } + if (adap->transmit_in_progress && + adap->transmit_in_progress_aborted) { + if (adap->transmitting) + cec_data_cancel(adap->transmitting, + CEC_TX_STATUS_ABORTED, 0); + adap->transmit_in_progress = false; + adap->transmit_in_progress_aborted = false; + goto unlock; + } if (adap->transmit_in_progress && timeout) { /* * If we timeout, then log that. Normally this does @@ -516,7 +526,7 @@ int cec_thread_func(void *_adap) adap->transmitting->msg.msg); /* Just give up on this. */ cec_data_cancel(adap->transmitting, - CEC_TX_STATUS_TIMEOUT); + CEC_TX_STATUS_TIMEOUT, 0); } else { pr_warn("cec-%s: transmit timed out\n", adap->name); } @@ -572,10 +582,11 @@ int cec_thread_func(void *_adap) if (data->attempts == 0) data->attempts = attempts; + adap->transmit_in_progress_aborted = false; /* Tell the adapter to transmit, cancel on error */ - if (adap->ops->adap_transmit(adap, data->attempts, - signal_free_time, &data->msg)) - cec_data_cancel(data, CEC_TX_STATUS_ABORTED); + if (call_op(adap, adap_transmit, data->attempts, + signal_free_time, &data->msg)) + cec_data_cancel(data, CEC_TX_STATUS_ABORTED, 0); else adap->transmit_in_progress = true; @@ -599,6 +610,8 @@ void cec_transmit_done_ts(struct cec_adapter *adap, u8 status, struct cec_msg *msg; unsigned int attempts_made = arb_lost_cnt + nack_cnt + low_drive_cnt + error_cnt; + bool done = status & (CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_OK); + bool aborted = adap->transmit_in_progress_aborted; dprintk(2, "%s: status 0x%02x\n", __func__, status); if (attempts_made < 1) @@ -619,6 +632,7 @@ void cec_transmit_done_ts(struct cec_adapter *adap, u8 status, goto wake_thread; } adap->transmit_in_progress = false; + adap->transmit_in_progress_aborted = false; msg = &data->msg; @@ -639,8 +653,7 @@ void cec_transmit_done_ts(struct cec_adapter *adap, u8 status, * the hardware didn't signal that it retried itself (by setting * CEC_TX_STATUS_MAX_RETRIES), then we will retry ourselves. */ - if (data->attempts > attempts_made && - !(status & (CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_OK))) { + if (!aborted && data->attempts > attempts_made && !done) { /* Retry this message */ data->attempts -= attempts_made; if (msg->timeout) @@ -655,6 +668,8 @@ void cec_transmit_done_ts(struct cec_adapter *adap, u8 status, goto wake_thread; } + if (aborted && !done) + status |= CEC_TX_STATUS_ABORTED; data->attempts = 0; /* Always set CEC_TX_STATUS_MAX_RETRIES on error */ @@ -733,9 +748,7 @@ static void cec_wait_timeout(struct work_struct *work) /* Mark the message as timed out */ list_del_init(&data->list); - data->msg.rx_ts = ktime_get_ns(); - data->msg.rx_status = CEC_RX_STATUS_TIMEOUT; - cec_data_completed(data); + cec_data_cancel(data, CEC_TX_STATUS_OK, CEC_RX_STATUS_TIMEOUT); unlock: mutex_unlock(&adap->lock); } @@ -751,6 +764,7 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg, { struct cec_data *data; bool is_raw = msg_is_raw(msg); + int err; if (adap->devnode.unregistered) return -ENODEV; @@ -913,14 +927,20 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg, * Release the lock and wait, retake the lock afterwards. */ mutex_unlock(&adap->lock); - wait_for_completion_killable(&data->c); - if (!data->completed) - cancel_delayed_work_sync(&data->work); + err = wait_for_completion_killable(&data->c); + cancel_delayed_work_sync(&data->work); mutex_lock(&adap->lock); + if (err) + adap->transmit_in_progress_aborted = true; + /* Cancel the transmit if it was interrupted */ - if (!data->completed) - cec_data_cancel(data, CEC_TX_STATUS_ABORTED); + if (!data->completed) { + if (data->msg.tx_status & CEC_TX_STATUS_OK) + cec_data_cancel(data, CEC_TX_STATUS_OK, CEC_RX_STATUS_ABORTED); + else + cec_data_cancel(data, CEC_TX_STATUS_ABORTED, 0); + } /* The transmit completed (possibly with an error) */ *msg = data->msg; @@ -1117,20 +1137,6 @@ void cec_received_msg_ts(struct cec_adapter *adap, if (valid_la && min_len) { /* These messages have special length requirements */ switch (cmd) { - case CEC_MSG_TIMER_STATUS: - if (msg->msg[2] & 0x10) { - switch (msg->msg[2] & 0xf) { - case CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE: - case CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE: - if (msg->len < 5) - valid_la = false; - break; - } - } else if ((msg->msg[2] & 0xf) == CEC_OP_PROG_ERROR_DUPLICATE) { - if (msg->len < 5) - valid_la = false; - } - break; case CEC_MSG_RECORD_ON: switch (msg->msg[2]) { case CEC_OP_RECORD_SRC_OWN: @@ -1309,7 +1315,7 @@ static int cec_config_log_addr(struct cec_adapter *adap, * Message not acknowledged, so this logical * address is free to use. */ - err = adap->ops->adap_log_addr(adap, log_addr); + err = call_op(adap, adap_log_addr, log_addr); if (err) return err; @@ -1326,9 +1332,8 @@ static int cec_config_log_addr(struct cec_adapter *adap, */ static void cec_adap_unconfigure(struct cec_adapter *adap) { - if (!adap->needs_hpd || - adap->phys_addr != CEC_PHYS_ADDR_INVALID) - WARN_ON(adap->ops->adap_log_addr(adap, CEC_LOG_ADDR_INVALID)); + if (!adap->needs_hpd || adap->phys_addr != CEC_PHYS_ADDR_INVALID) + WARN_ON(call_op(adap, adap_log_addr, CEC_LOG_ADDR_INVALID)); adap->log_addrs.log_addr_mask = 0; adap->is_configured = false; cec_flush(adap); @@ -1536,9 +1541,12 @@ static int cec_config_thread_func(void *arg) */ static void cec_claim_log_addrs(struct cec_adapter *adap, bool block) { - if (WARN_ON(adap->is_configuring || adap->is_configured)) + if (WARN_ON(adap->is_claiming_log_addrs || + adap->is_configuring || adap->is_configured)) return; + adap->is_claiming_log_addrs = true; + init_completion(&adap->config_completion); /* Ready to kick off the thread */ @@ -1547,11 +1555,67 @@ static void cec_claim_log_addrs(struct cec_adapter *adap, bool block) "ceccfg-%s", adap->name); if (IS_ERR(adap->kthread_config)) { adap->kthread_config = NULL; + adap->is_configuring = false; } else if (block) { mutex_unlock(&adap->lock); wait_for_completion(&adap->config_completion); mutex_lock(&adap->lock); } + adap->is_claiming_log_addrs = false; +} + +/* + * Helper function to enable/disable the CEC adapter. + * + * This function is called with adap->lock held. + */ +static int cec_adap_enable(struct cec_adapter *adap) +{ + bool enable; + int ret = 0; + + enable = adap->monitor_all_cnt || adap->monitor_pin_cnt || + adap->log_addrs.num_log_addrs; + if (adap->needs_hpd) + enable = enable && adap->phys_addr != CEC_PHYS_ADDR_INVALID; + + if (enable == adap->is_enabled) + return 0; + + /* serialize adap_enable */ + mutex_lock(&adap->devnode.lock); + if (enable) { + adap->last_initiator = 0xff; + adap->transmit_in_progress = false; + ret = adap->ops->adap_enable(adap, true); + if (!ret) { + /* + * Enable monitor-all/pin modes if needed. We warn, but + * continue if this fails as this is not a critical error. + */ + if (adap->monitor_all_cnt) + WARN_ON(call_op(adap, adap_monitor_all_enable, true)); + if (adap->monitor_pin_cnt) + WARN_ON(call_op(adap, adap_monitor_pin_enable, true)); + } + } else { + /* Disable monitor-all/pin modes if needed (needs_hpd == 1) */ + if (adap->monitor_all_cnt) + WARN_ON(call_op(adap, adap_monitor_all_enable, false)); + if (adap->monitor_pin_cnt) + WARN_ON(call_op(adap, adap_monitor_pin_enable, false)); + WARN_ON(adap->ops->adap_enable(adap, false)); + adap->last_initiator = 0xff; + adap->transmit_in_progress = false; + adap->transmit_in_progress_aborted = false; + if (adap->transmitting) + cec_data_cancel(adap->transmitting, CEC_TX_STATUS_ABORTED, 0); + } + if (!ret) + adap->is_enabled = enable; + wake_up_interruptible(&adap->kthread_waitq); + mutex_unlock(&adap->devnode.lock); + return ret; } /* Set a new physical address and send an event notifying userspace of this. @@ -1560,55 +1624,30 @@ static void cec_claim_log_addrs(struct cec_adapter *adap, bool block) */ void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block) { + bool becomes_invalid = phys_addr == CEC_PHYS_ADDR_INVALID; + bool is_invalid = adap->phys_addr == CEC_PHYS_ADDR_INVALID; + if (phys_addr == adap->phys_addr) return; - if (phys_addr != CEC_PHYS_ADDR_INVALID && adap->devnode.unregistered) + if (!becomes_invalid && adap->devnode.unregistered) return; dprintk(1, "new physical address %x.%x.%x.%x\n", cec_phys_addr_exp(phys_addr)); - if (phys_addr == CEC_PHYS_ADDR_INVALID || - adap->phys_addr != CEC_PHYS_ADDR_INVALID) { + if (becomes_invalid || !is_invalid) { adap->phys_addr = CEC_PHYS_ADDR_INVALID; cec_post_state_event(adap); cec_adap_unconfigure(adap); - /* Disabling monitor all mode should always succeed */ - if (adap->monitor_all_cnt) - WARN_ON(call_op(adap, adap_monitor_all_enable, false)); - /* serialize adap_enable */ - mutex_lock(&adap->devnode.lock); - if (adap->needs_hpd || list_empty(&adap->devnode.fhs)) { - WARN_ON(adap->ops->adap_enable(adap, false)); - adap->transmit_in_progress = false; - wake_up_interruptible(&adap->kthread_waitq); - } - mutex_unlock(&adap->devnode.lock); - if (phys_addr == CEC_PHYS_ADDR_INVALID) - return; - } - - /* serialize adap_enable */ - mutex_lock(&adap->devnode.lock); - adap->last_initiator = 0xff; - adap->transmit_in_progress = false; - - if (adap->needs_hpd || list_empty(&adap->devnode.fhs)) { - if (adap->ops->adap_enable(adap, true)) { - mutex_unlock(&adap->devnode.lock); + if (becomes_invalid) { + cec_adap_enable(adap); return; } } - if (adap->monitor_all_cnt && - call_op(adap, adap_monitor_all_enable, true)) { - if (adap->needs_hpd || list_empty(&adap->devnode.fhs)) - WARN_ON(adap->ops->adap_enable(adap, false)); - mutex_unlock(&adap->devnode.lock); - return; - } - mutex_unlock(&adap->devnode.lock); - adap->phys_addr = phys_addr; + if (is_invalid) + cec_adap_enable(adap); + cec_post_state_event(adap); if (adap->log_addrs.num_log_addrs) cec_claim_log_addrs(adap, block); @@ -1665,12 +1704,15 @@ int __cec_s_log_addrs(struct cec_adapter *adap, struct cec_log_addrs *log_addrs, bool block) { u16 type_mask = 0; + int err; int i; if (adap->devnode.unregistered) return -ENODEV; if (!log_addrs || log_addrs->num_log_addrs == 0) { + if (!adap->is_configuring && !adap->is_configured) + return 0; cec_adap_unconfigure(adap); adap->log_addrs.num_log_addrs = 0; for (i = 0; i < CEC_MAX_LOG_ADDRS; i++) @@ -1678,6 +1720,7 @@ int __cec_s_log_addrs(struct cec_adapter *adap, adap->log_addrs.osd_name[0] = '\0'; adap->log_addrs.vendor_id = CEC_VENDOR_ID_NONE; adap->log_addrs.cec_version = CEC_OP_CEC_VERSION_2_0; + cec_adap_enable(adap); return 0; } @@ -1813,9 +1856,10 @@ int __cec_s_log_addrs(struct cec_adapter *adap, log_addrs->log_addr_mask = adap->log_addrs.log_addr_mask; adap->log_addrs = *log_addrs; - if (adap->phys_addr != CEC_PHYS_ADDR_INVALID) + err = cec_adap_enable(adap); + if (!err && adap->phys_addr != CEC_PHYS_ADDR_INVALID) cec_claim_log_addrs(adap, block); - return 0; + return err; } int cec_s_log_addrs(struct cec_adapter *adap, @@ -1917,11 +1961,10 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, msg->msg[1] != CEC_MSG_CDC_MESSAGE) return 0; - if (adap->ops->received) { - /* Allow drivers to process the message first */ - if (adap->ops->received(adap, msg) != -ENOMSG) - return 0; - } + /* Allow drivers to process the message first */ + if (adap->ops->received && !adap->devnode.unregistered && + adap->ops->received(adap, msg) != -ENOMSG) + return 0; /* * REPORT_PHYSICAL_ADDR, CEC_MSG_USER_CONTROL_PRESSED and @@ -2114,20 +2157,25 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, */ int cec_monitor_all_cnt_inc(struct cec_adapter *adap) { - int ret = 0; + int ret; - if (adap->monitor_all_cnt == 0) - ret = call_op(adap, adap_monitor_all_enable, 1); - if (ret == 0) - adap->monitor_all_cnt++; + if (adap->monitor_all_cnt++) + return 0; + + ret = cec_adap_enable(adap); + if (ret) + adap->monitor_all_cnt--; return ret; } void cec_monitor_all_cnt_dec(struct cec_adapter *adap) { - adap->monitor_all_cnt--; - if (adap->monitor_all_cnt == 0) - WARN_ON(call_op(adap, adap_monitor_all_enable, 0)); + if (WARN_ON(!adap->monitor_all_cnt)) + return; + if (--adap->monitor_all_cnt) + return; + WARN_ON(call_op(adap, adap_monitor_all_enable, false)); + cec_adap_enable(adap); } /* @@ -2137,20 +2185,25 @@ void cec_monitor_all_cnt_dec(struct cec_adapter *adap) */ int cec_monitor_pin_cnt_inc(struct cec_adapter *adap) { - int ret = 0; + int ret; + + if (adap->monitor_pin_cnt++) + return 0; - if (adap->monitor_pin_cnt == 0) - ret = call_op(adap, adap_monitor_pin_enable, 1); - if (ret == 0) - adap->monitor_pin_cnt++; + ret = cec_adap_enable(adap); + if (ret) + adap->monitor_pin_cnt--; return ret; } void cec_monitor_pin_cnt_dec(struct cec_adapter *adap) { - adap->monitor_pin_cnt--; - if (adap->monitor_pin_cnt == 0) - WARN_ON(call_op(adap, adap_monitor_pin_enable, 0)); + if (WARN_ON(!adap->monitor_pin_cnt)) + return; + if (--adap->monitor_pin_cnt) + return; + WARN_ON(call_op(adap, adap_monitor_pin_enable, false)); + cec_adap_enable(adap); } #ifdef CONFIG_DEBUG_FS @@ -2164,6 +2217,7 @@ int cec_adap_status(struct seq_file *file, void *priv) struct cec_data *data; mutex_lock(&adap->lock); + seq_printf(file, "enabled: %d\n", adap->is_enabled); seq_printf(file, "configured: %d\n", adap->is_configured); seq_printf(file, "configuring: %d\n", adap->is_configuring); seq_printf(file, "phys_addr: %x.%x.%x.%x\n", @@ -2178,6 +2232,9 @@ int cec_adap_status(struct seq_file *file, void *priv) if (adap->monitor_all_cnt) seq_printf(file, "file handles in Monitor All mode: %u\n", adap->monitor_all_cnt); + if (adap->monitor_pin_cnt) + seq_printf(file, "file handles in Monitor Pin mode: %u\n", + adap->monitor_pin_cnt); if (adap->tx_timeouts) { seq_printf(file, "transmit timeouts: %u\n", adap->tx_timeouts); diff --git a/drivers/media/cec/core/cec-api.c b/drivers/media/cec/core/cec-api.c index 52c30e4e20..7f260f2cbb 100644 --- a/drivers/media/cec/core/cec-api.c +++ b/drivers/media/cec/core/cec-api.c @@ -178,7 +178,7 @@ static long cec_adap_s_log_addrs(struct cec_adapter *adap, struct cec_fh *fh, CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU | CEC_LOG_ADDRS_FL_CDC_ONLY; mutex_lock(&adap->lock); - if (!adap->is_configuring && + if (!adap->is_claiming_log_addrs && !adap->is_configuring && (!log_addrs.num_log_addrs || !adap->is_configured) && !cec_is_busy(adap, fh)) { err = __cec_s_log_addrs(adap, &log_addrs, block); @@ -586,18 +586,6 @@ static int cec_open(struct inode *inode, struct file *filp) return err; } - /* serialize adap_enable */ - mutex_lock(&devnode->lock); - if (list_empty(&devnode->fhs) && - !adap->needs_hpd && - adap->phys_addr == CEC_PHYS_ADDR_INVALID) { - err = adap->ops->adap_enable(adap, true); - if (err) { - mutex_unlock(&devnode->lock); - kfree(fh); - return err; - } - } filp->private_data = fh; /* Queue up initial state events */ @@ -607,7 +595,8 @@ static int cec_open(struct inode *inode, struct file *filp) adap->conn_info.type != CEC_CONNECTOR_TYPE_NO_CONNECTOR; cec_queue_event_fh(fh, &ev, 0); #ifdef CONFIG_CEC_PIN - if (adap->pin && adap->pin->ops->read_hpd) { + if (adap->pin && adap->pin->ops->read_hpd && + !adap->devnode.unregistered) { err = adap->pin->ops->read_hpd(adap); if (err >= 0) { ev.event = err ? CEC_EVENT_PIN_HPD_HIGH : @@ -615,7 +604,8 @@ static int cec_open(struct inode *inode, struct file *filp) cec_queue_event_fh(fh, &ev, 0); } } - if (adap->pin && adap->pin->ops->read_5v) { + if (adap->pin && adap->pin->ops->read_5v && + !adap->devnode.unregistered) { err = adap->pin->ops->read_5v(adap); if (err >= 0) { ev.event = err ? CEC_EVENT_PIN_5V_HIGH : @@ -625,6 +615,7 @@ static int cec_open(struct inode *inode, struct file *filp) } #endif + mutex_lock(&devnode->lock); mutex_lock(&devnode->lock_fhs); list_add(&fh->list, &devnode->fhs); mutex_unlock(&devnode->lock_fhs); @@ -656,15 +647,10 @@ static int cec_release(struct inode *inode, struct file *filp) cec_monitor_all_cnt_dec(adap); mutex_unlock(&adap->lock); - /* serialize adap_enable */ mutex_lock(&devnode->lock); mutex_lock(&devnode->lock_fhs); list_del(&fh->list); mutex_unlock(&devnode->lock_fhs); - if (cec_is_registered(adap) && list_empty(&devnode->fhs) && - !adap->needs_hpd && adap->phys_addr == CEC_PHYS_ADDR_INVALID) { - WARN_ON(adap->ops->adap_enable(adap, false)); - } mutex_unlock(&devnode->lock); /* Unhook pending transmits from this filehandle. */ @@ -678,6 +664,8 @@ static int cec_release(struct inode *inode, struct file *filp) list_del(&data->xfer_list); } mutex_unlock(&adap->lock); + + mutex_lock(&fh->lock); while (!list_empty(&fh->msgs)) { struct cec_msg_entry *entry = list_first_entry(&fh->msgs, struct cec_msg_entry, list); @@ -695,6 +683,7 @@ static int cec_release(struct inode *inode, struct file *filp) kfree(entry); } } + mutex_unlock(&fh->lock); kfree(fh); cec_put_device(devnode); diff --git a/drivers/media/cec/core/cec-core.c b/drivers/media/cec/core/cec-core.c index ec67065d52..34f1631b77 100644 --- a/drivers/media/cec/core/cec-core.c +++ b/drivers/media/cec/core/cec-core.c @@ -204,7 +204,7 @@ static ssize_t cec_error_inj_write(struct file *file, line = strsep(&p, "\n"); if (!*line || *line == '#') continue; - if (!adap->ops->error_inj_parse_line(adap, line)) { + if (!call_op(adap, error_inj_parse_line, line)) { kfree(buf); return -EINVAL; } @@ -217,7 +217,7 @@ static int cec_error_inj_show(struct seq_file *sf, void *unused) { struct cec_adapter *adap = sf->private; - return adap->ops->error_inj_show(adap, sf); + return call_op(adap, error_inj_show, sf); } static int cec_error_inj_open(struct inode *inode, struct file *file) diff --git a/drivers/media/cec/core/cec-pin-priv.h b/drivers/media/cec/core/cec-pin-priv.h index fb101f1586..e7d63f6acb 100644 --- a/drivers/media/cec/core/cec-pin-priv.h +++ b/drivers/media/cec/core/cec-pin-priv.h @@ -12,6 +12,17 @@ #include #include +#define call_pin_op(pin, op, arg...) \ + ((pin && pin->ops->op && !pin->adap->devnode.unregistered) ? \ + pin->ops->op(pin->adap, ## arg) : 0) + +#define call_void_pin_op(pin, op, arg...) \ + do { \ + if (pin && pin->ops->op && \ + !pin->adap->devnode.unregistered) \ + pin->ops->op(pin->adap, ## arg); \ + } while (0) + enum cec_pin_state { /* CEC is off */ CEC_ST_OFF, diff --git a/drivers/media/cec/core/cec-pin.c b/drivers/media/cec/core/cec-pin.c index 0eb90cc0ff..99e69c49e0 100644 --- a/drivers/media/cec/core/cec-pin.c +++ b/drivers/media/cec/core/cec-pin.c @@ -135,7 +135,7 @@ static void cec_pin_update(struct cec_pin *pin, bool v, bool force) static bool cec_pin_read(struct cec_pin *pin) { - bool v = pin->ops->read(pin->adap); + bool v = call_pin_op(pin, read); cec_pin_update(pin, v, false); return v; @@ -143,13 +143,13 @@ static bool cec_pin_read(struct cec_pin *pin) static void cec_pin_low(struct cec_pin *pin) { - pin->ops->low(pin->adap); + call_void_pin_op(pin, low); cec_pin_update(pin, false, false); } static bool cec_pin_high(struct cec_pin *pin) { - pin->ops->high(pin->adap); + call_void_pin_op(pin, high); return cec_pin_read(pin); } @@ -1086,7 +1086,7 @@ static int cec_pin_thread_func(void *_adap) CEC_PIN_IRQ_UNCHANGED)) { case CEC_PIN_IRQ_DISABLE: if (irq_enabled) { - pin->ops->disable_irq(adap); + call_void_pin_op(pin, disable_irq); irq_enabled = false; } cec_pin_high(pin); @@ -1097,7 +1097,7 @@ static int cec_pin_thread_func(void *_adap) case CEC_PIN_IRQ_ENABLE: if (irq_enabled) break; - pin->enable_irq_failed = !pin->ops->enable_irq(adap); + pin->enable_irq_failed = !call_pin_op(pin, enable_irq); if (pin->enable_irq_failed) { cec_pin_to_idle(pin); hrtimer_start(&pin->timer, ns_to_ktime(0), @@ -1112,8 +1112,8 @@ static int cec_pin_thread_func(void *_adap) if (kthread_should_stop()) break; } - if (pin->ops->disable_irq && irq_enabled) - pin->ops->disable_irq(adap); + if (irq_enabled) + call_void_pin_op(pin, disable_irq); hrtimer_cancel(&pin->timer); cec_pin_read(pin); cec_pin_to_idle(pin); @@ -1208,7 +1208,7 @@ static void cec_pin_adap_status(struct cec_adapter *adap, seq_printf(file, "state: %s\n", states[pin->state].name); seq_printf(file, "tx_bit: %d\n", pin->tx_bit); seq_printf(file, "rx_bit: %d\n", pin->rx_bit); - seq_printf(file, "cec pin: %d\n", pin->ops->read(adap)); + seq_printf(file, "cec pin: %d\n", call_pin_op(pin, read)); seq_printf(file, "cec pin events dropped: %u\n", pin->work_pin_events_dropped_cnt); seq_printf(file, "irq failed: %d\n", pin->enable_irq_failed); @@ -1261,8 +1261,7 @@ static void cec_pin_adap_status(struct cec_adapter *adap, pin->rx_data_bit_too_long_cnt = 0; pin->rx_low_drive_cnt = 0; pin->tx_low_drive_cnt = 0; - if (pin->ops->status) - pin->ops->status(adap, file); + call_void_pin_op(pin, status, file); } static int cec_pin_adap_monitor_all_enable(struct cec_adapter *adap, @@ -1278,7 +1277,7 @@ static void cec_pin_adap_free(struct cec_adapter *adap) { struct cec_pin *pin = adap->pin; - if (pin->ops->free) + if (pin && pin->ops->free) pin->ops->free(adap); adap->pin = NULL; kfree(pin); @@ -1288,7 +1287,7 @@ static int cec_pin_received(struct cec_adapter *adap, struct cec_msg *msg) { struct cec_pin *pin = adap->pin; - if (pin->ops->received) + if (pin->ops->received && !adap->devnode.unregistered) return pin->ops->received(adap, msg); return -ENOMSG; } diff --git a/drivers/media/cec/core/cec-priv.h b/drivers/media/cec/core/cec-priv.h index 9bbd05053d..b78df931aa 100644 --- a/drivers/media/cec/core/cec-priv.h +++ b/drivers/media/cec/core/cec-priv.h @@ -17,6 +17,16 @@ pr_info("cec-%s: " fmt, adap->name, ## arg); \ } while (0) +#define call_op(adap, op, arg...) \ + ((adap->ops->op && !adap->devnode.unregistered) ? \ + adap->ops->op(adap, ## arg) : 0) + +#define call_void_op(adap, op, arg...) \ + do { \ + if (adap->ops->op && !adap->devnode.unregistered) \ + adap->ops->op(adap, ## arg); \ + } while (0) + /* devnode to cec_adapter */ #define to_cec_adapter(node) container_of(node, struct cec_adapter, devnode) diff --git a/drivers/media/dvb-frontends/lgdt3306a.c b/drivers/media/dvb-frontends/lgdt3306a.c index f6e83a3873..79174336fa 100644 --- a/drivers/media/dvb-frontends/lgdt3306a.c +++ b/drivers/media/dvb-frontends/lgdt3306a.c @@ -2177,6 +2177,11 @@ static int lgdt3306a_probe(struct i2c_client *client, struct dvb_frontend *fe; int ret; + if (!client->dev.platform_data) { + dev_err(&client->dev, "platform data is mandatory\n"); + return -EINVAL; + } + config = kmemdup(client->dev.platform_data, sizeof(struct lgdt3306a_config), GFP_KERNEL); if (config == NULL) { diff --git a/drivers/media/dvb-frontends/mxl5xx.c b/drivers/media/dvb-frontends/mxl5xx.c index 0b00a23436..aaf9a17359 100644 --- a/drivers/media/dvb-frontends/mxl5xx.c +++ b/drivers/media/dvb-frontends/mxl5xx.c @@ -1390,57 +1390,57 @@ static int config_ts(struct mxl *state, enum MXL_HYDRA_DEMOD_ID_E demod_id, u32 nco_count_min = 0; u32 clk_type = 0; - struct MXL_REG_FIELD_T xpt_sync_polarity[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_sync_polarity[MXL_HYDRA_DEMOD_MAX] = { {0x90700010, 8, 1}, {0x90700010, 9, 1}, {0x90700010, 10, 1}, {0x90700010, 11, 1}, {0x90700010, 12, 1}, {0x90700010, 13, 1}, {0x90700010, 14, 1}, {0x90700010, 15, 1} }; - struct MXL_REG_FIELD_T xpt_clock_polarity[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_clock_polarity[MXL_HYDRA_DEMOD_MAX] = { {0x90700010, 16, 1}, {0x90700010, 17, 1}, {0x90700010, 18, 1}, {0x90700010, 19, 1}, {0x90700010, 20, 1}, {0x90700010, 21, 1}, {0x90700010, 22, 1}, {0x90700010, 23, 1} }; - struct MXL_REG_FIELD_T xpt_valid_polarity[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_valid_polarity[MXL_HYDRA_DEMOD_MAX] = { {0x90700014, 0, 1}, {0x90700014, 1, 1}, {0x90700014, 2, 1}, {0x90700014, 3, 1}, {0x90700014, 4, 1}, {0x90700014, 5, 1}, {0x90700014, 6, 1}, {0x90700014, 7, 1} }; - struct MXL_REG_FIELD_T xpt_ts_clock_phase[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_ts_clock_phase[MXL_HYDRA_DEMOD_MAX] = { {0x90700018, 0, 3}, {0x90700018, 4, 3}, {0x90700018, 8, 3}, {0x90700018, 12, 3}, {0x90700018, 16, 3}, {0x90700018, 20, 3}, {0x90700018, 24, 3}, {0x90700018, 28, 3} }; - struct MXL_REG_FIELD_T xpt_lsb_first[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_lsb_first[MXL_HYDRA_DEMOD_MAX] = { {0x9070000C, 16, 1}, {0x9070000C, 17, 1}, {0x9070000C, 18, 1}, {0x9070000C, 19, 1}, {0x9070000C, 20, 1}, {0x9070000C, 21, 1}, {0x9070000C, 22, 1}, {0x9070000C, 23, 1} }; - struct MXL_REG_FIELD_T xpt_sync_byte[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_sync_byte[MXL_HYDRA_DEMOD_MAX] = { {0x90700010, 0, 1}, {0x90700010, 1, 1}, {0x90700010, 2, 1}, {0x90700010, 3, 1}, {0x90700010, 4, 1}, {0x90700010, 5, 1}, {0x90700010, 6, 1}, {0x90700010, 7, 1} }; - struct MXL_REG_FIELD_T xpt_enable_output[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_enable_output[MXL_HYDRA_DEMOD_MAX] = { {0x9070000C, 0, 1}, {0x9070000C, 1, 1}, {0x9070000C, 2, 1}, {0x9070000C, 3, 1}, {0x9070000C, 4, 1}, {0x9070000C, 5, 1}, {0x9070000C, 6, 1}, {0x9070000C, 7, 1} }; - struct MXL_REG_FIELD_T xpt_err_replace_sync[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_err_replace_sync[MXL_HYDRA_DEMOD_MAX] = { {0x9070000C, 24, 1}, {0x9070000C, 25, 1}, {0x9070000C, 26, 1}, {0x9070000C, 27, 1}, {0x9070000C, 28, 1}, {0x9070000C, 29, 1}, {0x9070000C, 30, 1}, {0x9070000C, 31, 1} }; - struct MXL_REG_FIELD_T xpt_err_replace_valid[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_err_replace_valid[MXL_HYDRA_DEMOD_MAX] = { {0x90700014, 8, 1}, {0x90700014, 9, 1}, {0x90700014, 10, 1}, {0x90700014, 11, 1}, {0x90700014, 12, 1}, {0x90700014, 13, 1}, {0x90700014, 14, 1}, {0x90700014, 15, 1} }; - struct MXL_REG_FIELD_T xpt_continuous_clock[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_continuous_clock[MXL_HYDRA_DEMOD_MAX] = { {0x907001D4, 0, 1}, {0x907001D4, 1, 1}, {0x907001D4, 2, 1}, {0x907001D4, 3, 1}, {0x907001D4, 4, 1}, {0x907001D4, 5, 1}, {0x907001D4, 6, 1}, {0x907001D4, 7, 1} }; - struct MXL_REG_FIELD_T xpt_nco_clock_rate[MXL_HYDRA_DEMOD_MAX] = { + static const struct MXL_REG_FIELD_T xpt_nco_clock_rate[MXL_HYDRA_DEMOD_MAX] = { {0x90700044, 16, 80}, {0x90700044, 16, 81}, {0x90700044, 16, 82}, {0x90700044, 16, 83}, {0x90700044, 16, 84}, {0x90700044, 16, 85}, diff --git a/drivers/media/mc/mc-devnode.c b/drivers/media/mc/mc-devnode.c index f11382afe2..f249199dc6 100644 --- a/drivers/media/mc/mc-devnode.c +++ b/drivers/media/mc/mc-devnode.c @@ -246,15 +246,14 @@ int __must_check media_devnode_register(struct media_device *mdev, kobject_set_name(&devnode->cdev.kobj, "media%d", devnode->minor); /* Part 3: Add the media and char device */ + set_bit(MEDIA_FLAG_REGISTERED, &devnode->flags); ret = cdev_device_add(&devnode->cdev, &devnode->dev); if (ret < 0) { + clear_bit(MEDIA_FLAG_REGISTERED, &devnode->flags); pr_err("%s: cdev_device_add failed\n", __func__); goto cdev_add_error; } - /* Part 4: Activate this minor. The char device can now be used. */ - set_bit(MEDIA_FLAG_REGISTERED, &devnode->flags); - return 0; cdev_add_error: diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c b/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c index 162ab08912..dfb2be0b96 100644 --- a/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c +++ b/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c @@ -102,26 +102,29 @@ static inline u32 cio2_bytesperline(const unsigned int width) static void cio2_fbpt_exit_dummy(struct cio2_device *cio2) { + struct device *dev = &cio2->pci_dev->dev; + if (cio2->dummy_lop) { - dma_free_coherent(&cio2->pci_dev->dev, PAGE_SIZE, - cio2->dummy_lop, cio2->dummy_lop_bus_addr); + dma_free_coherent(dev, PAGE_SIZE, cio2->dummy_lop, + cio2->dummy_lop_bus_addr); cio2->dummy_lop = NULL; } if (cio2->dummy_page) { - dma_free_coherent(&cio2->pci_dev->dev, PAGE_SIZE, - cio2->dummy_page, cio2->dummy_page_bus_addr); + dma_free_coherent(dev, PAGE_SIZE, cio2->dummy_page, + cio2->dummy_page_bus_addr); cio2->dummy_page = NULL; } } static int cio2_fbpt_init_dummy(struct cio2_device *cio2) { + struct device *dev = &cio2->pci_dev->dev; unsigned int i; - cio2->dummy_page = dma_alloc_coherent(&cio2->pci_dev->dev, PAGE_SIZE, + cio2->dummy_page = dma_alloc_coherent(dev, PAGE_SIZE, &cio2->dummy_page_bus_addr, GFP_KERNEL); - cio2->dummy_lop = dma_alloc_coherent(&cio2->pci_dev->dev, PAGE_SIZE, + cio2->dummy_lop = dma_alloc_coherent(dev, PAGE_SIZE, &cio2->dummy_lop_bus_addr, GFP_KERNEL); if (!cio2->dummy_page || !cio2->dummy_lop) { @@ -497,6 +500,7 @@ static int cio2_hw_init(struct cio2_device *cio2, struct cio2_queue *q) static void cio2_hw_exit(struct cio2_device *cio2, struct cio2_queue *q) { + struct device *dev = &cio2->pci_dev->dev; void __iomem *const base = cio2->base; unsigned int i; u32 value; @@ -514,8 +518,7 @@ static void cio2_hw_exit(struct cio2_device *cio2, struct cio2_queue *q) value, value & CIO2_CDMAC0_DMA_HALTED, 4000, 2000000); if (ret) - dev_err(&cio2->pci_dev->dev, - "DMA %i can not be halted\n", CIO2_DMA_CHAN); + dev_err(dev, "DMA %i can not be halted\n", CIO2_DMA_CHAN); for (i = 0; i < CIO2_NUM_PORTS; i++) { writel(readl(base + CIO2_REG_PXM_FRF_CFG(i)) | @@ -539,8 +542,7 @@ static void cio2_buffer_done(struct cio2_device *cio2, unsigned int dma_chan) entry = &q->fbpt[q->bufs_first * CIO2_MAX_LOPS]; if (entry->first_entry.ctrl & CIO2_FBPT_CTRL_VALID) { - dev_warn(&cio2->pci_dev->dev, - "no ready buffers found on DMA channel %u\n", + dev_warn(dev, "no ready buffers found on DMA channel %u\n", dma_chan); return; } @@ -557,8 +559,7 @@ static void cio2_buffer_done(struct cio2_device *cio2, unsigned int dma_chan) q->bufs[q->bufs_first] = NULL; atomic_dec(&q->bufs_queued); - dev_dbg(&cio2->pci_dev->dev, - "buffer %i done\n", b->vbb.vb2_buf.index); + dev_dbg(dev, "buffer %i done\n", b->vbb.vb2_buf.index); b->vbb.vb2_buf.timestamp = ns; b->vbb.field = V4L2_FIELD_NONE; @@ -624,8 +625,8 @@ static const char *const cio2_port_errs[] = { static void cio2_irq_handle_once(struct cio2_device *cio2, u32 int_status) { - void __iomem *const base = cio2->base; struct device *dev = &cio2->pci_dev->dev; + void __iomem *const base = cio2->base; if (int_status & CIO2_INT_IOOE) { /* @@ -795,6 +796,7 @@ static int cio2_vb2_queue_setup(struct vb2_queue *vq, struct device *alloc_devs[]) { struct cio2_device *cio2 = vb2_get_drv_priv(vq); + struct device *dev = &cio2->pci_dev->dev; struct cio2_queue *q = vb2q_to_cio2_queue(vq); unsigned int i; @@ -802,7 +804,7 @@ static int cio2_vb2_queue_setup(struct vb2_queue *vq, for (i = 0; i < *num_planes; ++i) { sizes[i] = q->format.plane_fmt[i].sizeimage; - alloc_devs[i] = &cio2->pci_dev->dev; + alloc_devs[i] = dev; } *num_buffers = clamp_val(*num_buffers, 1, CIO2_MAX_BUFFERS); @@ -879,6 +881,7 @@ static int cio2_vb2_buf_init(struct vb2_buffer *vb) static void cio2_vb2_buf_queue(struct vb2_buffer *vb) { struct cio2_device *cio2 = vb2_get_drv_priv(vb->vb2_queue); + struct device *dev = &cio2->pci_dev->dev; struct cio2_queue *q = container_of(vb->vb2_queue, struct cio2_queue, vbq); struct cio2_buffer *b = @@ -889,7 +892,7 @@ static void cio2_vb2_buf_queue(struct vb2_buffer *vb) int bufs_queued = atomic_inc_return(&q->bufs_queued); u32 fbpt_rp; - dev_dbg(&cio2->pci_dev->dev, "queue buffer %d\n", vb->index); + dev_dbg(dev, "queue buffer %d\n", vb->index); /* * This code queues the buffer to the CIO2 DMA engine, which starts @@ -940,12 +943,12 @@ static void cio2_vb2_buf_queue(struct vb2_buffer *vb) return; } - dev_dbg(&cio2->pci_dev->dev, "entry %i was full!\n", next); + dev_dbg(dev, "entry %i was full!\n", next); next = (next + 1) % CIO2_MAX_BUFFERS; } local_irq_restore(flags); - dev_err(&cio2->pci_dev->dev, "error: all cio2 entries were full!\n"); + dev_err(dev, "error: all cio2 entries were full!\n"); atomic_dec(&q->bufs_queued); vb2_buffer_done(vb, VB2_BUF_STATE_ERROR); } @@ -954,6 +957,7 @@ static void cio2_vb2_buf_queue(struct vb2_buffer *vb) static void cio2_vb2_buf_cleanup(struct vb2_buffer *vb) { struct cio2_device *cio2 = vb2_get_drv_priv(vb->vb2_queue); + struct device *dev = &cio2->pci_dev->dev; struct cio2_buffer *b = container_of(vb, struct cio2_buffer, vbb.vb2_buf); unsigned int i; @@ -961,7 +965,7 @@ static void cio2_vb2_buf_cleanup(struct vb2_buffer *vb) /* Free LOP table */ for (i = 0; i < CIO2_MAX_LOPS; i++) { if (b->lop[i]) - dma_free_coherent(&cio2->pci_dev->dev, PAGE_SIZE, + dma_free_coherent(dev, PAGE_SIZE, b->lop[i], b->lop_bus_addr[i]); } } @@ -970,14 +974,15 @@ static int cio2_vb2_start_streaming(struct vb2_queue *vq, unsigned int count) { struct cio2_queue *q = vb2q_to_cio2_queue(vq); struct cio2_device *cio2 = vb2_get_drv_priv(vq); + struct device *dev = &cio2->pci_dev->dev; int r; cio2->cur_queue = q; atomic_set(&q->frame_sequence, 0); - r = pm_runtime_resume_and_get(&cio2->pci_dev->dev); + r = pm_runtime_resume_and_get(dev); if (r < 0) { - dev_info(&cio2->pci_dev->dev, "failed to set power %d\n", r); + dev_info(dev, "failed to set power %d\n", r); return r; } @@ -1003,9 +1008,9 @@ static int cio2_vb2_start_streaming(struct vb2_queue *vq, unsigned int count) fail_hw: media_pipeline_stop(&q->vdev.entity); fail_pipeline: - dev_dbg(&cio2->pci_dev->dev, "failed to start streaming (%d)\n", r); + dev_dbg(dev, "failed to start streaming (%d)\n", r); cio2_vb2_return_all_buffers(q, VB2_BUF_STATE_QUEUED); - pm_runtime_put(&cio2->pci_dev->dev); + pm_runtime_put(dev); return r; } @@ -1014,16 +1019,16 @@ static void cio2_vb2_stop_streaming(struct vb2_queue *vq) { struct cio2_queue *q = vb2q_to_cio2_queue(vq); struct cio2_device *cio2 = vb2_get_drv_priv(vq); + struct device *dev = &cio2->pci_dev->dev; if (v4l2_subdev_call(q->sensor, video, s_stream, 0)) - dev_err(&cio2->pci_dev->dev, - "failed to stop sensor streaming\n"); + dev_err(dev, "failed to stop sensor streaming\n"); cio2_hw_exit(cio2, q); synchronize_irq(cio2->pci_dev->irq); cio2_vb2_return_all_buffers(q, VB2_BUF_STATE_ERROR); media_pipeline_stop(&q->vdev.entity); - pm_runtime_put(&cio2->pci_dev->dev); + pm_runtime_put(dev); cio2->streaming = false; } @@ -1315,12 +1320,12 @@ static int cio2_video_link_validate(struct media_link *link) struct video_device, entity); struct cio2_queue *q = container_of(vd, struct cio2_queue, vdev); struct cio2_device *cio2 = video_get_drvdata(vd); + struct device *dev = &cio2->pci_dev->dev; struct v4l2_subdev_format source_fmt; int ret; if (!media_entity_remote_pad(link->sink->entity->pads)) { - dev_info(&cio2->pci_dev->dev, - "video node %s pad not connected\n", vd->name); + dev_info(dev, "video node %s pad not connected\n", vd->name); return -ENOTCONN; } @@ -1330,8 +1335,7 @@ static int cio2_video_link_validate(struct media_link *link) if (source_fmt.format.width != q->format.width || source_fmt.format.height != q->format.height) { - dev_err(&cio2->pci_dev->dev, - "Wrong width or height %ux%u (%ux%u expected)\n", + dev_err(dev, "Wrong width or height %ux%u (%ux%u expected)\n", q->format.width, q->format.height, source_fmt.format.width, source_fmt.format.height); return -EINVAL; @@ -1412,6 +1416,7 @@ static int cio2_notifier_complete(struct v4l2_async_notifier *notifier) { struct cio2_device *cio2 = container_of(notifier, struct cio2_device, notifier); + struct device *dev = &cio2->pci_dev->dev; struct sensor_async_subdev *s_asd; struct v4l2_async_subdev *asd; struct cio2_queue *q; @@ -1428,8 +1433,7 @@ static int cio2_notifier_complete(struct v4l2_async_notifier *notifier) break; if (pad == q->sensor->entity.num_pads) { - dev_err(&cio2->pci_dev->dev, - "failed to find src pad for %s\n", + dev_err(dev, "failed to find src pad for %s\n", q->sensor->name); return -ENXIO; } @@ -1439,8 +1443,7 @@ static int cio2_notifier_complete(struct v4l2_async_notifier *notifier) &q->subdev.entity, CIO2_PAD_SINK, 0); if (ret) { - dev_err(&cio2->pci_dev->dev, - "failed to create link for %s\n", + dev_err(dev, "failed to create link for %s\n", q->sensor->name); return ret; } @@ -1457,6 +1460,7 @@ static const struct v4l2_async_notifier_operations cio2_async_ops = { static int cio2_parse_firmware(struct cio2_device *cio2) { + struct device *dev = &cio2->pci_dev->dev; unsigned int i; int ret; @@ -1467,10 +1471,8 @@ static int cio2_parse_firmware(struct cio2_device *cio2) struct sensor_async_subdev *s_asd; struct fwnode_handle *ep; - ep = fwnode_graph_get_endpoint_by_id( - dev_fwnode(&cio2->pci_dev->dev), i, 0, - FWNODE_GRAPH_ENDPOINT_NEXT); - + ep = fwnode_graph_get_endpoint_by_id(dev_fwnode(dev), i, 0, + FWNODE_GRAPH_ENDPOINT_NEXT); if (!ep) continue; @@ -1504,8 +1506,7 @@ static int cio2_parse_firmware(struct cio2_device *cio2) cio2->notifier.ops = &cio2_async_ops; ret = v4l2_async_notifier_register(&cio2->v4l2_dev, &cio2->notifier); if (ret) - dev_err(&cio2->pci_dev->dev, - "failed to register async notifier : %d\n", ret); + dev_err(dev, "failed to register async notifier : %d\n", ret); return ret; } @@ -1524,7 +1525,7 @@ static int cio2_queue_init(struct cio2_device *cio2, struct cio2_queue *q) static const u32 default_width = 1936; static const u32 default_height = 1096; const struct ipu3_cio2_fmt dflt_fmt = formats[0]; - + struct device *dev = &cio2->pci_dev->dev; struct video_device *vdev = &q->vdev; struct vb2_queue *vbq = &q->vbq; struct v4l2_subdev *subdev = &q->subdev; @@ -1566,8 +1567,7 @@ static int cio2_queue_init(struct cio2_device *cio2, struct cio2_queue *q) subdev->internal_ops = &cio2_subdev_internal_ops; r = media_entity_pads_init(&subdev->entity, CIO2_PADS, q->subdev_pads); if (r) { - dev_err(&cio2->pci_dev->dev, - "failed initialize subdev media entity (%d)\n", r); + dev_err(dev, "failed initialize subdev media entity (%d)\n", r); goto fail_subdev_media_entity; } @@ -1575,8 +1575,8 @@ static int cio2_queue_init(struct cio2_device *cio2, struct cio2_queue *q) vdev->entity.ops = &cio2_video_entity_ops; r = media_entity_pads_init(&vdev->entity, 1, &q->vdev_pad); if (r) { - dev_err(&cio2->pci_dev->dev, - "failed initialize videodev media entity (%d)\n", r); + dev_err(dev, "failed initialize videodev media entity (%d)\n", + r); goto fail_vdev_media_entity; } @@ -1590,8 +1590,7 @@ static int cio2_queue_init(struct cio2_device *cio2, struct cio2_queue *q) v4l2_set_subdevdata(subdev, cio2); r = v4l2_device_register_subdev(&cio2->v4l2_dev, subdev); if (r) { - dev_err(&cio2->pci_dev->dev, - "failed initialize subdev (%d)\n", r); + dev_err(dev, "failed initialize subdev (%d)\n", r); goto fail_subdev; } @@ -1607,8 +1606,7 @@ static int cio2_queue_init(struct cio2_device *cio2, struct cio2_queue *q) vbq->lock = &q->lock; r = vb2_queue_init(vbq); if (r) { - dev_err(&cio2->pci_dev->dev, - "failed to initialize videobuf2 queue (%d)\n", r); + dev_err(dev, "failed to initialize videobuf2 queue (%d)\n", r); goto fail_subdev; } @@ -1625,8 +1623,7 @@ static int cio2_queue_init(struct cio2_device *cio2, struct cio2_queue *q) video_set_drvdata(vdev, cio2); r = video_register_device(vdev, VFL_TYPE_VIDEO, -1); if (r) { - dev_err(&cio2->pci_dev->dev, - "failed to register video device (%d)\n", r); + dev_err(dev, "failed to register video device (%d)\n", r); goto fail_vdev; } @@ -1648,7 +1645,7 @@ static int cio2_queue_init(struct cio2_device *cio2, struct cio2_queue *q) fail_vdev_media_entity: media_entity_cleanup(&subdev->entity); fail_subdev_media_entity: - cio2_fbpt_exit(q, &cio2->pci_dev->dev); + cio2_fbpt_exit(q, dev); fail_fbpt: mutex_destroy(&q->subdev_lock); mutex_destroy(&q->lock); @@ -1715,11 +1712,12 @@ static int cio2_check_fwnode_graph(struct fwnode_handle *fwnode) static int cio2_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) { - struct fwnode_handle *fwnode = dev_fwnode(&pci_dev->dev); + struct device *dev = &pci_dev->dev; + struct fwnode_handle *fwnode = dev_fwnode(dev); struct cio2_device *cio2; int r; - cio2 = devm_kzalloc(&pci_dev->dev, sizeof(*cio2), GFP_KERNEL); + cio2 = devm_kzalloc(dev, sizeof(*cio2), GFP_KERNEL); if (!cio2) return -ENOMEM; cio2->pci_dev = pci_dev; @@ -1732,7 +1730,7 @@ static int cio2_pci_probe(struct pci_dev *pci_dev, r = cio2_check_fwnode_graph(fwnode); if (r) { if (fwnode && !IS_ERR_OR_NULL(fwnode->secondary)) { - dev_err(&pci_dev->dev, "fwnode graph has no endpoints connected\n"); + dev_err(dev, "fwnode graph has no endpoints connected\n"); return -EINVAL; } @@ -1743,16 +1741,16 @@ static int cio2_pci_probe(struct pci_dev *pci_dev, r = pcim_enable_device(pci_dev); if (r) { - dev_err(&pci_dev->dev, "failed to enable device (%d)\n", r); + dev_err(dev, "failed to enable device (%d)\n", r); return r; } - dev_info(&pci_dev->dev, "device 0x%x (rev: 0x%x)\n", + dev_info(dev, "device 0x%x (rev: 0x%x)\n", pci_dev->device, pci_dev->revision); r = pcim_iomap_regions(pci_dev, 1 << CIO2_PCI_BAR, pci_name(pci_dev)); if (r) { - dev_err(&pci_dev->dev, "failed to remap I/O memory (%d)\n", r); + dev_err(dev, "failed to remap I/O memory (%d)\n", r); return -ENODEV; } @@ -1764,13 +1762,13 @@ static int cio2_pci_probe(struct pci_dev *pci_dev, r = pci_set_dma_mask(pci_dev, CIO2_DMA_MASK); if (r) { - dev_err(&pci_dev->dev, "failed to set DMA mask (%d)\n", r); + dev_err(dev, "failed to set DMA mask (%d)\n", r); return -ENODEV; } r = pci_enable_msi(pci_dev); if (r) { - dev_err(&pci_dev->dev, "failed to enable MSI (%d)\n", r); + dev_err(dev, "failed to enable MSI (%d)\n", r); return r; } @@ -1780,7 +1778,7 @@ static int cio2_pci_probe(struct pci_dev *pci_dev, mutex_init(&cio2->lock); - cio2->media_dev.dev = &cio2->pci_dev->dev; + cio2->media_dev.dev = dev; strscpy(cio2->media_dev.model, CIO2_DEVICE_NAME, sizeof(cio2->media_dev.model)); snprintf(cio2->media_dev.bus_info, sizeof(cio2->media_dev.bus_info), @@ -1793,10 +1791,9 @@ static int cio2_pci_probe(struct pci_dev *pci_dev, goto fail_mutex_destroy; cio2->v4l2_dev.mdev = &cio2->media_dev; - r = v4l2_device_register(&pci_dev->dev, &cio2->v4l2_dev); + r = v4l2_device_register(dev, &cio2->v4l2_dev); if (r) { - dev_err(&pci_dev->dev, - "failed to register V4L2 device (%d)\n", r); + dev_err(dev, "failed to register V4L2 device (%d)\n", r); goto fail_media_device_unregister; } @@ -1806,20 +1803,20 @@ static int cio2_pci_probe(struct pci_dev *pci_dev, v4l2_async_notifier_init(&cio2->notifier); + r = devm_request_irq(dev, pci_dev->irq, cio2_irq, IRQF_SHARED, + CIO2_NAME, cio2); + if (r) { + dev_err(dev, "failed to request IRQ (%d)\n", r); + goto fail_clean_notifier; + } + /* Register notifier for subdevices we care */ r = cio2_parse_firmware(cio2); if (r) goto fail_clean_notifier; - r = devm_request_irq(&pci_dev->dev, pci_dev->irq, cio2_irq, - IRQF_SHARED, CIO2_NAME, cio2); - if (r) { - dev_err(&pci_dev->dev, "failed to request IRQ (%d)\n", r); - goto fail_clean_notifier; - } - - pm_runtime_put_noidle(&pci_dev->dev); - pm_runtime_allow(&pci_dev->dev); + pm_runtime_put_noidle(dev); + pm_runtime_allow(dev); return 0; @@ -2008,10 +2005,9 @@ static int __maybe_unused cio2_resume(struct device *dev) if (!cio2->streaming) return 0; /* Start stream */ - r = pm_runtime_force_resume(&cio2->pci_dev->dev); + r = pm_runtime_force_resume(dev); if (r < 0) { - dev_err(&cio2->pci_dev->dev, - "failed to set power %d\n", r); + dev_err(dev, "failed to set power %d\n", r); return r; } diff --git a/drivers/media/pci/ngene/ngene-core.c b/drivers/media/pci/ngene/ngene-core.c index 7481f553f9..24ec576dc3 100644 --- a/drivers/media/pci/ngene/ngene-core.c +++ b/drivers/media/pci/ngene/ngene-core.c @@ -1488,7 +1488,9 @@ static int init_channel(struct ngene_channel *chan) } if (dev->ci.en && (io & NGENE_IO_TSOUT)) { - dvb_ca_en50221_init(adapter, dev->ci.en, 0, 1); + ret = dvb_ca_en50221_init(adapter, dev->ci.en, 0, 1); + if (ret != 0) + goto err; set_transfer(chan, 1); chan->dev->channel[2].DataFormatFlags = DF_SWAP32; set_transfer(&chan->dev->channel[2], 1); diff --git a/drivers/media/pci/sta2x11/sta2x11_vip.c b/drivers/media/pci/sta2x11/sta2x11_vip.c index 524912f20d..74bbdc11ab 100644 --- a/drivers/media/pci/sta2x11/sta2x11_vip.c +++ b/drivers/media/pci/sta2x11/sta2x11_vip.c @@ -760,7 +760,7 @@ static const struct video_device video_dev_template = { /** * vip_irq - interrupt routine * @irq: Number of interrupt ( not used, correct number is assumed ) - * @vip: local data structure containing all information + * @data: local data structure containing all information * * check for both frame interrupts set ( top and bottom ). * check FIFO overflow, but limit number of log messages after open. @@ -770,8 +770,9 @@ static const struct video_device video_dev_template = { * * IRQ_HANDLED, interrupt done. */ -static irqreturn_t vip_irq(int irq, struct sta2x11_vip *vip) +static irqreturn_t vip_irq(int irq, void *data) { + struct sta2x11_vip *vip = data; unsigned int status; status = reg_read(vip, DVP_ITS); @@ -1053,9 +1054,7 @@ static int sta2x11_vip_init_one(struct pci_dev *pdev, spin_lock_init(&vip->slock); - ret = request_irq(pdev->irq, - (irq_handler_t) vip_irq, - IRQF_SHARED, KBUILD_MODNAME, vip); + ret = request_irq(pdev->irq, vip_irq, IRQF_SHARED, KBUILD_MODNAME, vip); if (ret) { dev_err(&pdev->dev, "request_irq failed\n"); ret = -ENODEV; diff --git a/drivers/media/radio/radio-shark2.c b/drivers/media/radio/radio-shark2.c index f1c5c0a6a3..e3e6aa87fe 100644 --- a/drivers/media/radio/radio-shark2.c +++ b/drivers/media/radio/radio-shark2.c @@ -62,7 +62,7 @@ struct shark_device { #ifdef SHARK_USE_LEDS struct work_struct led_work; struct led_classdev leds[NO_LEDS]; - char led_names[NO_LEDS][32]; + char led_names[NO_LEDS][64]; atomic_t brightness[NO_LEDS]; unsigned long brightness_new; #endif diff --git a/drivers/media/tuners/xc4000.c b/drivers/media/tuners/xc4000.c index ef9af05200..849df4d1c5 100644 --- a/drivers/media/tuners/xc4000.c +++ b/drivers/media/tuners/xc4000.c @@ -1517,10 +1517,10 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq) { struct xc4000_priv *priv = fe->tuner_priv; + mutex_lock(&priv->lock); *freq = priv->freq_hz + priv->freq_offset; if (debug) { - mutex_lock(&priv->lock); if ((priv->cur_fw.type & (BASE | FM | DTV6 | DTV7 | DTV78 | DTV8)) == BASE) { u16 snr = 0; @@ -1531,8 +1531,8 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq) return 0; } } - mutex_unlock(&priv->lock); } + mutex_unlock(&priv->lock); dprintk(1, "%s()\n", __func__); diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c index 8ab1be03e7..0354614351 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.c +++ b/drivers/media/usb/b2c2/flexcop-usb.c @@ -501,17 +501,21 @@ static int flexcop_usb_transfer_init(struct flexcop_usb *fc_usb) static int flexcop_usb_init(struct flexcop_usb *fc_usb) { - /* use the alternate setting with the larges buffer */ - int ret = usb_set_interface(fc_usb->udev, 0, 1); + struct usb_host_interface *alt; + int ret; + /* use the alternate setting with the largest buffer */ + ret = usb_set_interface(fc_usb->udev, 0, 1); if (ret) { err("set interface failed."); return ret; } - if (fc_usb->uintf->cur_altsetting->desc.bNumEndpoints < 1) + alt = fc_usb->uintf->cur_altsetting; + + if (alt->desc.bNumEndpoints < 2) return -ENODEV; - if (!usb_endpoint_is_isoc_in(&fc_usb->uintf->cur_altsetting->endpoint[0].desc)) + if (!usb_endpoint_is_isoc_in(&alt->endpoint[0].desc)) return -ENODEV; switch (fc_usb->udev->speed) { diff --git a/drivers/media/usb/stk1160/stk1160-video.c b/drivers/media/usb/stk1160/stk1160-video.c index 4cf540d1b2..2a5a90311e 100644 --- a/drivers/media/usb/stk1160/stk1160-video.c +++ b/drivers/media/usb/stk1160/stk1160-video.c @@ -99,7 +99,7 @@ void stk1160_buffer_done(struct stk1160 *dev) static inline void stk1160_copy_video(struct stk1160 *dev, u8 *src, int len) { - int linesdone, lineoff, lencopy; + int linesdone, lineoff, lencopy, offset; int bytesperline = dev->width * 2; struct stk1160_buffer *buf = dev->isoc_ctl.buf; u8 *dst = buf->mem; @@ -139,8 +139,13 @@ void stk1160_copy_video(struct stk1160 *dev, u8 *src, int len) * Check if we have enough space left in the buffer. * In that case, we force loop exit after copy. */ - if (lencopy > buf->bytesused - buf->length) { - lencopy = buf->bytesused - buf->length; + offset = dst - (u8 *)buf->mem; + if (offset > buf->length) { + dev_warn_ratelimited(dev->dev, "out of bounds offset\n"); + return; + } + if (lencopy > buf->length - offset) { + lencopy = buf->length - offset; remain = lencopy; } @@ -182,8 +187,13 @@ void stk1160_copy_video(struct stk1160 *dev, u8 *src, int len) * Check if we have enough space left in the buffer. * In that case, we force loop exit after copy. */ - if (lencopy > buf->bytesused - buf->length) { - lencopy = buf->bytesused - buf->length; + offset = dst - (u8 *)buf->mem; + if (offset > buf->length) { + dev_warn_ratelimited(dev->dev, "offset out of bounds\n"); + return; + } + if (lencopy > buf->length - offset) { + lencopy = buf->length - offset; remain = lencopy; } diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c index d03ace324d..e93b1d5c3a 100644 --- a/drivers/media/v4l2-core/v4l2-dev.c +++ b/drivers/media/v4l2-core/v4l2-dev.c @@ -1033,8 +1033,10 @@ int __video_register_device(struct video_device *vdev, vdev->dev.devt = MKDEV(VIDEO_MAJOR, vdev->minor); vdev->dev.parent = vdev->dev_parent; dev_set_name(&vdev->dev, "%s%d", name_base, vdev->num); + mutex_lock(&videodev_lock); ret = device_register(&vdev->dev); if (ret < 0) { + mutex_unlock(&videodev_lock); pr_err("%s: device_register failed\n", __func__); goto cleanup; } @@ -1054,6 +1056,7 @@ int __video_register_device(struct video_device *vdev, /* Part 6: Activate this minor. The char device can now be used. */ set_bit(V4L2_FL_REGISTERED, &vdev->flags); + mutex_unlock(&videodev_lock); return 0; diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 305ffad131..02bea44369 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -585,6 +585,31 @@ static unsigned int at24_get_offset_adj(u8 flags, unsigned int byte_len) } } +static void at24_probe_temp_sensor(struct i2c_client *client) +{ + struct at24_data *at24 = i2c_get_clientdata(client); + struct i2c_board_info info = { .type = "jc42" }; + int ret; + u8 val; + + /* + * Byte 2 has value 11 for DDR3, earlier versions don't + * support the thermal sensor present flag + */ + ret = at24_read(at24, 2, &val, 1); + if (ret || val != 11) + return; + + /* Byte 32, bit 7 is set if temp sensor is present */ + ret = at24_read(at24, 32, &val, 1); + if (ret || !(val & BIT(7))) + return; + + info.addr = 0x18 | (client->addr & 7); + + i2c_new_client_device(client->adapter, &info); +} + static int at24_probe(struct i2c_client *client) { struct regmap_config regmap_config = { }; @@ -757,14 +782,6 @@ static int at24_probe(struct i2c_client *client) pm_runtime_set_active(dev); pm_runtime_enable(dev); - at24->nvmem = devm_nvmem_register(dev, &nvmem_config); - if (IS_ERR(at24->nvmem)) { - pm_runtime_disable(dev); - if (!pm_runtime_status_suspended(dev)) - regulator_disable(at24->vcc_reg); - return PTR_ERR(at24->nvmem); - } - /* * Perform a one-byte test read to verify that the * chip is functional. @@ -777,6 +794,19 @@ static int at24_probe(struct i2c_client *client) return -ENODEV; } + at24->nvmem = devm_nvmem_register(dev, &nvmem_config); + if (IS_ERR(at24->nvmem)) { + pm_runtime_disable(dev); + if (!pm_runtime_status_suspended(dev)) + regulator_disable(at24->vcc_reg); + return dev_err_probe(dev, PTR_ERR(at24->nvmem), + "failed to register nvmem\n"); + } + + /* If this a SPD EEPROM, probe for DDR3 thermal sensor */ + if (cdata == &at24_data_spd) + at24_probe_temp_sensor(client); + pm_runtime_idle(dev); if (writable) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 6095195715..bbabfe49f9 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -112,6 +112,10 @@ #define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */ #define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */ +#define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */ +#define MEI_DEV_ID_ARL_H 0x7770 /* Arrow Lake Point H */ + +#define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */ /* * MEI HW Section diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index f2765d6b8c..188d847662 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -115,9 +115,13 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_H, MEI_ME_PCH15_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c index f50d228824..a0ad1f3a69 100644 --- a/drivers/misc/vmw_vmci/vmci_datagram.c +++ b/drivers/misc/vmw_vmci/vmci_datagram.c @@ -234,7 +234,8 @@ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg) dg_info->in_dg_host_queue = true; dg_info->entry = dst_entry; - memcpy(&dg_info->msg, dg, dg_size); + dg_info->msg = *dg; + memcpy(&dg_info->msg_payload, dg + 1, dg->payload_size); INIT_WORK(&dg_info->work, dg_delayed_dispatch); schedule_work(&dg_info->work); @@ -377,7 +378,8 @@ int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg) dg_info->in_dg_host_queue = false; dg_info->entry = dst_entry; - memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg)); + dg_info->msg = *dg; + memcpy(&dg_info->msg_payload, dg + 1, dg->payload_size); INIT_WORK(&dg_info->work, dg_delayed_dispatch); schedule_work(&dg_info->work); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index c3202e2e99..68f820cd73 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -412,7 +412,7 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user( struct mmc_blk_ioc_data *idata; int err; - idata = kmalloc(sizeof(*idata), GFP_KERNEL); + idata = kzalloc(sizeof(*idata), GFP_KERNEL); if (!idata) { err = -ENOMEM; goto out; @@ -485,7 +485,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, if (idata->flags & MMC_BLK_IOC_DROP) return 0; - if (idata->flags & MMC_BLK_IOC_SBC) + if (idata->flags & MMC_BLK_IOC_SBC && i > 0) prev_idata = idatas[i - 1]; /* @@ -862,10 +862,11 @@ static const struct block_device_operations mmc_bdops = { static int mmc_blk_part_switch_pre(struct mmc_card *card, unsigned int part_type) { - const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB; + const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK; + const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB; int ret = 0; - if ((part_type & mask) == mask) { + if ((part_type & mask) == rpmb) { if (card->ext_csd.cmdq_en) { ret = mmc_cmdq_disable(card); if (ret) @@ -880,10 +881,11 @@ static int mmc_blk_part_switch_pre(struct mmc_card *card, static int mmc_blk_part_switch_post(struct mmc_card *card, unsigned int part_type) { - const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB; + const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK; + const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB; int ret = 0; - if ((part_type & mask) == mask) { + if ((part_type & mask) == rpmb) { mmc_retune_unpause(card->host); if (card->reenable_cmdq && !card->ext_csd.cmdq_en) ret = mmc_cmdq_enable(card); diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 8303b48444..820a780e41 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -119,13 +119,12 @@ void mmc_retune_enable(struct mmc_host *host) /* * Pause re-tuning for a small set of operations. The pause begins after the - * next command and after first doing re-tuning. + * next command. */ void mmc_retune_pause(struct mmc_host *host) { if (!host->retune_paused) { host->retune_paused = 1; - mmc_retune_needed(host); mmc_retune_hold(host); } } diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index 681653d097..04c510b751 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -202,6 +202,26 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, } EXPORT_SYMBOL(mmc_gpiod_request_cd); +/** + * mmc_gpiod_set_cd_config - set config for card-detection GPIO + * @host: mmc host + * @config: Generic pinconf config (from pinconf_to_config_packed()) + * + * This can be used by mmc host drivers to fixup a card-detection GPIO's config + * (e.g. set PIN_CONFIG_BIAS_PULL_UP) after acquiring the GPIO descriptor + * through mmc_gpiod_request_cd(). + * + * Returns: + * 0 on success, or a negative errno value on error. + */ +int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config) +{ + struct mmc_gpio *ctx = host->slot.handler_priv; + + return gpiod_set_config(ctx->cd_gpio, config); +} +EXPORT_SYMBOL(mmc_gpiod_set_cd_config); + bool mmc_can_gpio_cd(struct mmc_host *host) { struct mmc_gpio *ctx = host->slot.handler_priv; diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index f4e15eef70..bb8ea7bc19 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -82,6 +82,7 @@ struct sdhci_acpi_host { enum { DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP = BIT(0), DMI_QUIRK_SD_NO_WRITE_PROTECT = BIT(1), + DMI_QUIRK_SD_CD_ACTIVE_HIGH = BIT(2), }; static inline void *sdhci_acpi_priv(struct sdhci_acpi_host *c) @@ -795,7 +796,20 @@ static const struct acpi_device_id sdhci_acpi_ids[] = { }; MODULE_DEVICE_TABLE(acpi, sdhci_acpi_ids); +/* Please keep this list sorted alphabetically */ static const struct dmi_system_id sdhci_acpi_quirks[] = { + { + /* + * The Acer Aspire Switch 10 (SW5-012) microSD slot always + * reports the card being write-protected even though microSD + * cards do not have a write-protect switch at all. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"), + }, + .driver_data = (void *)DMI_QUIRK_SD_NO_WRITE_PROTECT, + }, { /* * The Lenovo Miix 320-10ICR has a bug in the _PS0 method of @@ -812,15 +826,23 @@ static const struct dmi_system_id sdhci_acpi_quirks[] = { }, { /* - * The Acer Aspire Switch 10 (SW5-012) microSD slot always - * reports the card being write-protected even though microSD - * cards do not have a write-protect switch at all. + * Lenovo Yoga Tablet 2 Pro 1380F/L (13" Android version) this + * has broken WP reporting and an inverted CD signal. + * Note this has more or less the same BIOS as the Lenovo Yoga + * Tablet 2 830F/L or 1050F/L (8" and 10" Android), but unlike + * the 830 / 1050 models which share the same mainboard this + * model has a different mainboard and the inverted CD and + * broken WP are unique to this board. */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"), + DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp."), + DMI_MATCH(DMI_PRODUCT_NAME, "VALLEYVIEW C0 PLATFORM"), + DMI_MATCH(DMI_BOARD_NAME, "BYT-T FFD8"), + /* Full match so as to NOT match the 830/1050 BIOS */ + DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21.X64.0005.R00.1504101516"), }, - .driver_data = (void *)DMI_QUIRK_SD_NO_WRITE_PROTECT, + .driver_data = (void *)(DMI_QUIRK_SD_NO_WRITE_PROTECT | + DMI_QUIRK_SD_CD_ACTIVE_HIGH), }, { /* @@ -833,6 +855,17 @@ static const struct dmi_system_id sdhci_acpi_quirks[] = { }, .driver_data = (void *)DMI_QUIRK_SD_NO_WRITE_PROTECT, }, + { + /* + * The Toshiba WT10-A's microSD slot always reports the card being + * write-protected. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "TOSHIBA WT10-A"), + }, + .driver_data = (void *)DMI_QUIRK_SD_NO_WRITE_PROTECT, + }, {} /* Terminating entry */ }; @@ -947,6 +980,9 @@ static int sdhci_acpi_probe(struct platform_device *pdev) if (sdhci_acpi_flag(c, SDHCI_ACPI_SD_CD)) { bool v = sdhci_acpi_flag(c, SDHCI_ACPI_SD_CD_OVERRIDE_LEVEL); + if (quirks & DMI_QUIRK_SD_CD_ACTIVE_HIGH) + host->mmc->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH; + err = mmc_gpiod_request_cd(host->mmc, NULL, 0, v, 0); if (err) { if (err == -EPROBE_DEFER) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 6537a84526..943fc7b7f4 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -2827,6 +2827,11 @@ static __maybe_unused int sdhci_msm_runtime_suspend(struct device *dev) struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + host->runtime_suspended = true; + spin_unlock_irqrestore(&host->lock, flags); /* Drop the performance vote */ dev_pm_opp_set_rate(dev, 0); @@ -2841,6 +2846,7 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev) struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + unsigned long flags; int ret; ret = clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks), @@ -2859,7 +2865,15 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev) dev_pm_opp_set_rate(dev, msm_host->clk_rate); - return sdhci_msm_ice_resume(msm_host); + ret = sdhci_msm_ice_resume(msm_host); + if (ret) + return ret; + + spin_lock_irqsave(&host->lock, flags); + host->runtime_suspended = false; + spin_unlock_irqrestore(&host->lock, flags); + + return ret; } static const struct dev_pm_ops sdhci_msm_pm_ops = { diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index 210701e4fc..230b61902e 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -140,19 +140,26 @@ static const struct timing_data td[] = { struct sdhci_am654_data { struct regmap *base; - bool legacy_otapdly; int otap_del_sel[ARRAY_SIZE(td)]; int itap_del_sel[ARRAY_SIZE(td)]; + u32 itap_del_ena[ARRAY_SIZE(td)]; int clkbuf_sel; int trm_icp; int drv_strength; int strb_sel; u32 flags; u32 quirks; + bool dll_enable; #define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0) }; +struct window { + u8 start; + u8 end; + u8 length; +}; + struct sdhci_am654_driver_data { const struct sdhci_pltfm_data *pdata; u32 flags; @@ -232,11 +239,13 @@ static void sdhci_am654_setup_dll(struct sdhci_host *host, unsigned int clock) } static void sdhci_am654_write_itapdly(struct sdhci_am654_data *sdhci_am654, - u32 itapdly) + u32 itapdly, u32 enable) { /* Set ITAPCHGWIN before writing to ITAPDLY */ regmap_update_bits(sdhci_am654->base, PHY_CTRL4, ITAPCHGWIN_MASK, 1 << ITAPCHGWIN_SHIFT); + regmap_update_bits(sdhci_am654->base, PHY_CTRL4, ITAPDLYENA_MASK, + enable << ITAPDLYENA_SHIFT); regmap_update_bits(sdhci_am654->base, PHY_CTRL4, ITAPDLYSEL_MASK, itapdly << ITAPDLYSEL_SHIFT); regmap_update_bits(sdhci_am654->base, PHY_CTRL4, ITAPCHGWIN_MASK, 0); @@ -253,8 +262,8 @@ static void sdhci_am654_setup_delay_chain(struct sdhci_am654_data *sdhci_am654, mask = SELDLYTXCLK_MASK | SELDLYRXCLK_MASK; regmap_update_bits(sdhci_am654->base, PHY_CTRL5, mask, val); - sdhci_am654_write_itapdly(sdhci_am654, - sdhci_am654->itap_del_sel[timing]); + sdhci_am654_write_itapdly(sdhci_am654, sdhci_am654->itap_del_sel[timing], + sdhci_am654->itap_del_ena[timing]); } static void sdhci_am654_set_clock(struct sdhci_host *host, unsigned int clock) @@ -263,7 +272,6 @@ static void sdhci_am654_set_clock(struct sdhci_host *host, unsigned int clock) struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); unsigned char timing = host->mmc->ios.timing; u32 otap_del_sel; - u32 otap_del_ena; u32 mask, val; regmap_update_bits(sdhci_am654->base, PHY_CTRL1, ENDLL_MASK, 0); @@ -271,15 +279,10 @@ static void sdhci_am654_set_clock(struct sdhci_host *host, unsigned int clock) sdhci_set_clock(host, clock); /* Setup DLL Output TAP delay */ - if (sdhci_am654->legacy_otapdly) - otap_del_sel = sdhci_am654->otap_del_sel[0]; - else - otap_del_sel = sdhci_am654->otap_del_sel[timing]; - - otap_del_ena = (timing > MMC_TIMING_UHS_SDR25) ? 1 : 0; + otap_del_sel = sdhci_am654->otap_del_sel[timing]; mask = OTAPDLYENA_MASK | OTAPDLYSEL_MASK; - val = (otap_del_ena << OTAPDLYENA_SHIFT) | + val = (0x1 << OTAPDLYENA_SHIFT) | (otap_del_sel << OTAPDLYSEL_SHIFT); /* Write to STRBSEL for HS400 speed mode */ @@ -294,10 +297,21 @@ static void sdhci_am654_set_clock(struct sdhci_host *host, unsigned int clock) regmap_update_bits(sdhci_am654->base, PHY_CTRL4, mask, val); - if (timing > MMC_TIMING_UHS_SDR25 && clock >= CLOCK_TOO_SLOW_HZ) + if (timing > MMC_TIMING_UHS_SDR25 && clock >= CLOCK_TOO_SLOW_HZ) { sdhci_am654_setup_dll(host, clock); - else + sdhci_am654->dll_enable = true; + + if (timing == MMC_TIMING_MMC_HS400) { + sdhci_am654->itap_del_ena[timing] = 0x1; + sdhci_am654->itap_del_sel[timing] = sdhci_am654->itap_del_sel[timing - 1]; + } + + sdhci_am654_write_itapdly(sdhci_am654, sdhci_am654->itap_del_sel[timing], + sdhci_am654->itap_del_ena[timing]); + } else { sdhci_am654_setup_delay_chain(sdhci_am654, timing); + sdhci_am654->dll_enable = false; + } regmap_update_bits(sdhci_am654->base, PHY_CTRL5, CLKBUFSEL_MASK, sdhci_am654->clkbuf_sel); @@ -310,19 +324,29 @@ static void sdhci_j721e_4bit_set_clock(struct sdhci_host *host, struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); unsigned char timing = host->mmc->ios.timing; u32 otap_del_sel; + u32 itap_del_ena; + u32 itap_del_sel; u32 mask, val; /* Setup DLL Output TAP delay */ - if (sdhci_am654->legacy_otapdly) - otap_del_sel = sdhci_am654->otap_del_sel[0]; - else - otap_del_sel = sdhci_am654->otap_del_sel[timing]; + otap_del_sel = sdhci_am654->otap_del_sel[timing]; mask = OTAPDLYENA_MASK | OTAPDLYSEL_MASK; val = (0x1 << OTAPDLYENA_SHIFT) | (otap_del_sel << OTAPDLYSEL_SHIFT); - regmap_update_bits(sdhci_am654->base, PHY_CTRL4, mask, val); + /* Setup Input TAP delay */ + itap_del_ena = sdhci_am654->itap_del_ena[timing]; + itap_del_sel = sdhci_am654->itap_del_sel[timing]; + + mask |= ITAPDLYENA_MASK | ITAPDLYSEL_MASK; + val |= (itap_del_ena << ITAPDLYENA_SHIFT) | + (itap_del_sel << ITAPDLYSEL_SHIFT); + + regmap_update_bits(sdhci_am654->base, PHY_CTRL4, ITAPCHGWIN_MASK, + 1 << ITAPCHGWIN_SHIFT); + regmap_update_bits(sdhci_am654->base, PHY_CTRL4, mask, val); + regmap_update_bits(sdhci_am654->base, PHY_CTRL4, ITAPCHGWIN_MASK, 0); regmap_update_bits(sdhci_am654->base, PHY_CTRL5, CLKBUFSEL_MASK, sdhci_am654->clkbuf_sel); @@ -415,40 +439,105 @@ static u32 sdhci_am654_cqhci_irq(struct sdhci_host *host, u32 intmask) return 0; } -#define ITAP_MAX 32 +#define ITAPDLY_LENGTH 32 +#define ITAPDLY_LAST_INDEX (ITAPDLY_LENGTH - 1) + +static u32 sdhci_am654_calculate_itap(struct sdhci_host *host, struct window + *fail_window, u8 num_fails, bool circular_buffer) +{ + u8 itap = 0, start_fail = 0, end_fail = 0, pass_length = 0; + u8 first_fail_start = 0, last_fail_end = 0; + struct device *dev = mmc_dev(host->mmc); + struct window pass_window = {0, 0, 0}; + int prev_fail_end = -1; + u8 i; + + if (!num_fails) + return ITAPDLY_LAST_INDEX >> 1; + + if (fail_window->length == ITAPDLY_LENGTH) { + dev_err(dev, "No passing ITAPDLY, return 0\n"); + return 0; + } + + first_fail_start = fail_window->start; + last_fail_end = fail_window[num_fails - 1].end; + + for (i = 0; i < num_fails; i++) { + start_fail = fail_window[i].start; + end_fail = fail_window[i].end; + pass_length = start_fail - (prev_fail_end + 1); + + if (pass_length > pass_window.length) { + pass_window.start = prev_fail_end + 1; + pass_window.length = pass_length; + } + prev_fail_end = end_fail; + } + + if (!circular_buffer) + pass_length = ITAPDLY_LAST_INDEX - last_fail_end; + else + pass_length = ITAPDLY_LAST_INDEX - last_fail_end + first_fail_start; + + if (pass_length > pass_window.length) { + pass_window.start = last_fail_end + 1; + pass_window.length = pass_length; + } + + if (!circular_buffer) + itap = pass_window.start + (pass_window.length >> 1); + else + itap = (pass_window.start + (pass_window.length >> 1)) % ITAPDLY_LENGTH; + + return (itap > ITAPDLY_LAST_INDEX) ? ITAPDLY_LAST_INDEX >> 1 : itap; +} + static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host, u32 opcode) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); - int cur_val, prev_val = 1, fail_len = 0, pass_window = 0, pass_len; - u32 itap; + unsigned char timing = host->mmc->ios.timing; + struct window fail_window[ITAPDLY_LENGTH]; + u8 curr_pass, itap; + u8 fail_index = 0; + u8 prev_pass = 1; + + memset(fail_window, 0, sizeof(fail_window)); /* Enable ITAPDLY */ - regmap_update_bits(sdhci_am654->base, PHY_CTRL4, ITAPDLYENA_MASK, - 1 << ITAPDLYENA_SHIFT); + sdhci_am654->itap_del_ena[timing] = 0x1; + + for (itap = 0; itap < ITAPDLY_LENGTH; itap++) { + sdhci_am654_write_itapdly(sdhci_am654, itap, sdhci_am654->itap_del_ena[timing]); - for (itap = 0; itap < ITAP_MAX; itap++) { - sdhci_am654_write_itapdly(sdhci_am654, itap); + curr_pass = !mmc_send_tuning(host->mmc, opcode, NULL); - cur_val = !mmc_send_tuning(host->mmc, opcode, NULL); - if (cur_val && !prev_val) - pass_window = itap; + if (!curr_pass && prev_pass) + fail_window[fail_index].start = itap; + + if (!curr_pass) { + fail_window[fail_index].end = itap; + fail_window[fail_index].length++; + } - if (!cur_val) - fail_len++; + if (curr_pass && !prev_pass) + fail_index++; - prev_val = cur_val; + prev_pass = curr_pass; } - /* - * Having determined the length of the failing window and start of - * the passing window calculate the length of the passing window and - * set the final value halfway through it considering the range as a - * circular buffer - */ - pass_len = ITAP_MAX - fail_len; - itap = (pass_window + (pass_len >> 1)) % ITAP_MAX; - sdhci_am654_write_itapdly(sdhci_am654, itap); + + if (fail_window[fail_index].length != 0) + fail_index++; + + itap = sdhci_am654_calculate_itap(host, fail_window, fail_index, + sdhci_am654->dll_enable); + + sdhci_am654_write_itapdly(sdhci_am654, itap, sdhci_am654->itap_del_ena[timing]); + + /* Save ITAPDLY */ + sdhci_am654->itap_del_sel[timing] = itap; return 0; } @@ -579,32 +668,15 @@ static int sdhci_am654_get_otap_delay(struct sdhci_host *host, int i; int ret; - ret = device_property_read_u32(dev, td[MMC_TIMING_LEGACY].otap_binding, - &sdhci_am654->otap_del_sel[MMC_TIMING_LEGACY]); - if (ret) { - /* - * ti,otap-del-sel-legacy is mandatory, look for old binding - * if not found. - */ - ret = device_property_read_u32(dev, "ti,otap-del-sel", - &sdhci_am654->otap_del_sel[0]); - if (ret) { - dev_err(dev, "Couldn't find otap-del-sel\n"); - - return ret; - } - - dev_info(dev, "Using legacy binding ti,otap-del-sel\n"); - sdhci_am654->legacy_otapdly = true; - - return 0; - } - for (i = MMC_TIMING_LEGACY; i <= MMC_TIMING_MMC_HS400; i++) { ret = device_property_read_u32(dev, td[i].otap_binding, &sdhci_am654->otap_del_sel[i]); if (ret) { + if (i == MMC_TIMING_LEGACY) { + dev_err(dev, "Couldn't find mandatory ti,otap-del-sel-legacy\n"); + return ret; + } dev_dbg(dev, "Couldn't find %s\n", td[i].otap_binding); /* @@ -617,9 +689,12 @@ static int sdhci_am654_get_otap_delay(struct sdhci_host *host, host->mmc->caps2 &= ~td[i].capability; } - if (td[i].itap_binding) - device_property_read_u32(dev, td[i].itap_binding, - &sdhci_am654->itap_del_sel[i]); + if (td[i].itap_binding) { + ret = device_property_read_u32(dev, td[i].itap_binding, + &sdhci_am654->itap_del_sel[i]); + if (!ret) + sdhci_am654->itap_del_ena[i] = 0x1; + } } return 0; diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 437048bb80..5024cae411 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -259,6 +259,8 @@ static void tmio_mmc_reset_work(struct work_struct *work) else mrq->cmd->error = -ETIMEDOUT; + /* No new calls yet, but disallow concurrent tmio_mmc_done_work() */ + host->mrq = ERR_PTR(-EBUSY); host->cmd = NULL; host->data = NULL; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 2a228ee326..16077e5a2d 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -886,8 +886,10 @@ static int mtd_otp_nvmem_add(struct mtd_info *mtd) if (mtd->_get_user_prot_info && mtd->_read_user_prot_reg) { size = mtd_otp_size(mtd, true); - if (size < 0) - return size; + if (size < 0) { + err = size; + goto err; + } if (size > 0) { nvmem = mtd_otp_nvmem_register(mtd, "user-otp", size, diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c index 5d2ddb037a..2068025d56 100644 --- a/drivers/mtd/nand/raw/diskonchip.c +++ b/drivers/mtd/nand/raw/diskonchip.c @@ -53,7 +53,7 @@ static unsigned long doc_locations[] __initdata = { 0xe8000, 0xea000, 0xec000, 0xee000, #endif #endif - 0xffffffff }; +}; static struct mtd_info *doclist = NULL; @@ -1552,7 +1552,7 @@ static int __init init_nanddoc(void) if (ret < 0) return ret; } else { - for (i = 0; (doc_locations[i] != 0xffffffff); i++) { + for (i = 0; i < ARRAY_SIZE(doc_locations); i++) { doc_probe(doc_locations[i]); } } diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 9d44196532..9198d54cb3 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -59,7 +59,7 @@ #define CMDRWGEN(cmd_dir, ran, bch, short_mode, page_size, pages) \ ( \ (cmd_dir) | \ - ((ran) << 19) | \ + (ran) | \ ((bch) << 14) | \ ((short_mode) << 13) | \ (((page_size) & 0x7f) << 6) | \ diff --git a/drivers/mtd/nand/raw/nand_hynix.c b/drivers/mtd/nand/raw/nand_hynix.c index a9f50c9af1..856b3d6ece 100644 --- a/drivers/mtd/nand/raw/nand_hynix.c +++ b/drivers/mtd/nand/raw/nand_hynix.c @@ -402,7 +402,7 @@ static int hynix_nand_rr_init(struct nand_chip *chip) if (ret) pr_warn("failed to initialize read-retry infrastructure"); - return 0; + return ret; } static void hynix_nand_extract_oobsize(struct nand_chip *chip, diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 6e95c4b147..8081fc760d 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -86,9 +86,10 @@ size_t ubi_calc_fm_size(struct ubi_device *ubi) sizeof(struct ubi_fm_scan_pool) + sizeof(struct ubi_fm_scan_pool) + (ubi->peb_count * sizeof(struct ubi_fm_ec)) + - (sizeof(struct ubi_fm_eba) + - (ubi->peb_count * sizeof(__be32))) + - sizeof(struct ubi_fm_volhdr) * UBI_MAX_VOLUMES; + ((sizeof(struct ubi_fm_eba) + + sizeof(struct ubi_fm_volhdr)) * + (UBI_MAX_VOLUMES + UBI_INT_VOL_COUNT)) + + (ubi->peb_count * sizeof(__be32)); return roundup(size, ubi->leb_size); } diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index f700f0e4f2..6e5489e233 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -791,6 +791,12 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai) * The number of supported volumes is limited by the eraseblock size * and by the UBI_MAX_VOLUMES constant. */ + + if (ubi->leb_size < UBI_VTBL_RECORD_SIZE) { + ubi_err(ubi, "LEB size too small for a volume record"); + return -EINVAL; + } + ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE; if (ubi->vtbl_slots > UBI_MAX_VOLUMES) ubi->vtbl_slots = UBI_MAX_VOLUMES; diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 50e60852f1..e5ed9dff10 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -46,7 +46,9 @@ obj-$(CONFIG_ARCNET) += arcnet/ obj-$(CONFIG_DEV_APPLETALK) += appletalk/ obj-$(CONFIG_CAIF) += caif/ obj-$(CONFIG_CAN) += can/ -obj-$(CONFIG_NET_DSA) += dsa/ +ifdef CONFIG_NET_DSA +obj-y += dsa/ +endif obj-$(CONFIG_ETHERNET) += ethernet/ obj-$(CONFIG_FDDI) += fddi/ obj-$(CONFIG_HIPPI) += hippi/ diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 14c47e614d..86db6a18c8 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -415,6 +415,20 @@ static void mt7530_pll_setup(struct mt7530_priv *priv) core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); } +/* If port 6 is available as a CPU port, always prefer that as the default, + * otherwise don't care. + */ +static struct dsa_port * +mt753x_preferred_default_local_cpu_port(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp = dsa_to_port(ds, 6); + + if (dsa_port_is_cpu(cpu_dp)) + return cpu_dp; + + return NULL; +} + /* Setup port 6 interface mode and TRGMII TX circuit */ static int mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) @@ -994,20 +1008,173 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface) mutex_unlock(&priv->reg_mutex); } -/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std - * 802.1Qâ„¢-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA - * must only be propagated to C-VLAN and MAC Bridge components. That means - * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports, - * these frames are supposed to be processed by the CPU (software). So we make - * the switch only forward them to the CPU port. And if received from a CPU - * port, forward to a single port. The software is responsible of making the - * switch conform to the latter by setting a single port as destination port on - * the special tag. +/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL) + * of the Open Systems Interconnection basic reference model (OSI/RM) are + * described; the medium access control (MAC) and logical link control (LLC) + * sublayers. The MAC sublayer is the one facing the physical layer. + * + * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A + * Bridge component comprises a MAC Relay Entity for interconnecting the Ports + * of the Bridge, at least two Ports, and higher layer entities with at least a + * Spanning Tree Protocol Entity included. + * + * Each Bridge Port also functions as an end station and shall provide the MAC + * Service to an LLC Entity. Each instance of the MAC Service is provided to a + * distinct LLC Entity that supports protocol identification, multiplexing, and + * demultiplexing, for protocol data unit (PDU) transmission and reception by + * one or more higher layer entities. + * + * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC + * Entity associated with each Bridge Port is modeled as being directly + * connected to the attached Local Area Network (LAN). + * + * On the switch with CPU port architecture, CPU port functions as Management + * Port, and the Management Port functionality is provided by software which + * functions as an end station. Software is connected to an IEEE 802 LAN that is + * wholly contained within the system that incorporates the Bridge. Software + * provides access to the LLC Entity associated with each Bridge Port by the + * value of the source port field on the special tag on the frame received by + * software. + * + * We call frames that carry control information to determine the active + * topology and current extent of each Virtual Local Area Network (VLAN), i.e., + * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration + * Protocol Data Units (MVRPDUs), and frames from other link constrained + * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and + * Link Layer Discovery Protocol (LLDP), link-local frames. They are not + * forwarded by a Bridge. Permanently configured entries in the filtering + * database (FDB) ensure that such frames are discarded by the Forwarding + * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail: + * + * Each of the reserved MAC addresses specified in Table 8-1 + * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be + * permanently configured in the FDB in C-VLAN components and ERs. + * + * Each of the reserved MAC addresses specified in Table 8-2 + * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently + * configured in the FDB in S-VLAN components. + * + * Each of the reserved MAC addresses specified in Table 8-3 + * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in + * TPMR components. + * + * The FDB entries for reserved MAC addresses shall specify filtering for all + * Bridge Ports and all VIDs. Management shall not provide the capability to + * modify or remove entries for reserved MAC addresses. + * + * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of + * propagation of PDUs within a Bridged Network, as follows: + * + * The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no + * conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN) + * component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward. + * PDUs transmitted using this destination address, or any other addresses + * that appear in Table 8-1, Table 8-2, and Table 8-3 + * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can + * therefore travel no further than those stations that can be reached via a + * single individual LAN from the originating station. + * + * The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an + * address that no conformant S-VLAN component, C-VLAN component, or MAC + * Bridge can forward; however, this address is relayed by a TPMR component. + * PDUs using this destination address, or any of the other addresses that + * appear in both Table 8-1 and Table 8-2 but not in Table 8-3 + * (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by + * any TPMRs but will propagate no further than the nearest S-VLAN component, + * C-VLAN component, or MAC Bridge. + * + * The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address + * that no conformant C-VLAN component, MAC Bridge can forward; however, it is + * relayed by TPMR components and S-VLAN components. PDUs using this + * destination address, or any of the other addresses that appear in Table 8-1 + * but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]), + * will be relayed by TPMR components and S-VLAN components but will propagate + * no further than the nearest C-VLAN component or MAC Bridge. + * + * Because the LLC Entity associated with each Bridge Port is provided via CPU + * port, we must not filter these frames but forward them to CPU port. + * + * In a Bridge, the transmission Port is majorly decided by ingress and egress + * rules, FDB, and spanning tree Port State functions of the Forwarding Process. + * For link-local frames, only CPU port should be designated as destination port + * in the FDB, and the other functions of the Forwarding Process must not + * interfere with the decision of the transmission Port. We call this process + * trapping frames to CPU port. + * + * Therefore, on the switch with CPU port architecture, link-local frames must + * be trapped to CPU port, and certain link-local frames received by a Port of a + * Bridge comprising a TPMR component or an S-VLAN component must be excluded + * from it. + * + * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port + * MAC Relay (TPMR) component as a TPMR component supports only a subset of the + * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port + * doesn't count) of this architecture will either function as a standard MAC + * Bridge or a standard VLAN Bridge. + * + * Therefore, a Bridge of this architecture can only comprise S-VLAN components, + * C-VLAN components, or MAC Bridge components. Since there's no TPMR component, + * we don't need to relay PDUs using the destination addresses specified on the + * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge + * section where they must be relayed by TPMR components. + * + * One option to trap link-local frames to CPU port is to add static FDB entries + * with CPU port designated as destination port. However, because that + * Independent VLAN Learning (IVL) is being used on every VID, each entry only + * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC + * Bridge component or a C-VLAN component, there would have to be 16 times 4096 + * entries. This switch intellectual property can only hold a maximum of 2048 + * entries. Using this option, there also isn't a mechanism to prevent + * link-local frames from being discarded when the spanning tree Port State of + * the reception Port is discarding. + * + * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4 + * registers. Whilst this applies to every VID, it doesn't contain all of the + * reserved MAC addresses without affecting the remaining Standard Group MAC + * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the + * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination + * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF + * destination addresses which may be relayed by MAC Bridges or VLAN Bridges. + * The latter option provides better but not complete conformance. + * + * This switch intellectual property also does not provide a mechanism to trap + * link-local frames with specific destination addresses to CPU port by Bridge, + * to conform to the filtering rules for the distinct Bridge components. * - * This switch intellectual property cannot conform to this part of the standard - * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC - * DAs, it also includes :22-FF which the scope of propagation is not supposed - * to be restricted for these MAC DAs. + * Therefore, regardless of the type of the Bridge component, link-local frames + * with these destination addresses will be trapped to CPU port: + * + * 01-80-C2-00-00-[00,01,02,03,0E] + * + * In a Bridge comprising a MAC Bridge component or a C-VLAN component: + * + * Link-local frames with these destination addresses won't be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] + * + * In a Bridge comprising an S-VLAN component: + * + * Link-local frames with these destination addresses will be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-00 + * + * Link-local frames with these destination addresses won't be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-[04,05,06,07,08,09,0A] + * + * To trap link-local frames to CPU port as conformant as this switch + * intellectual property can allow, link-local frames are made to be regarded as + * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual + * property only lets the frames regarded as BPDUs bypass the spanning tree Port + * State function of the Forwarding Process. + * + * The only remaining interference is the ingress rules. When the reception Port + * has no PVID assigned on software, VLAN-untagged frames won't be allowed in. + * There doesn't seem to be a mechanism on the switch intellectual property to + * have link-local frames bypass this function of the Forwarding Process. */ static void mt753x_trap_frames(struct mt7530_priv *priv) @@ -1015,35 +1182,43 @@ mt753x_trap_frames(struct mt7530_priv *priv) /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them * VLAN-untagged. */ - mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK | - MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | - MT753X_BPDU_PORT_FW_MASK, - MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_BPC, + MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK | + MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | + MT753X_BPDU_PORT_FW_MASK, + MT753X_PAE_BPDU_FR | + MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress * them VLAN-untagged. */ - mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK | - MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK | - MT753X_R01_PORT_FW_MASK, - MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_RGAC1, + MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK | + MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR | + MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK, + MT753X_R02_BPDU_FR | + MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R01_BPDU_FR | + MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress * them VLAN-untagged. */ - mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK | - MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK | - MT753X_R03_PORT_FW_MASK, - MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_RGAC2, + MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK | + MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR | + MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK, + MT753X_R0E_BPDU_FR | + MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R03_BPDU_FR | + MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); } static int @@ -1071,6 +1246,13 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) if (priv->id == ID_MT7530 || priv->id == ID_MT7621) mt7530_rmw(priv, MT7530_MFC, CPU_MASK, CPU_EN | CPU_PORT(port)); + /* Add the CPU port to the CPU port bitmap for MT7531. Trapped frames + * will be forwarded to the CPU port that is affine to the inbound user + * port. + */ + if (priv->id == ID_MT7531) + mt7530_set(priv, MT7531_CFC, MT7531_CPU_PMAP(BIT(port))); + /* CPU port gets connected to all user ports of * the switch. */ @@ -2228,8 +2410,6 @@ mt7530_setup(struct dsa_switch *ds) SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | SYS_CTRL_REG_RST); - mt7530_pll_setup(priv); - /* Lower Tx driving for TRGMII path */ for (i = 0; i < NUM_TRGMII_CTRL; i++) mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), @@ -2247,6 +2427,9 @@ mt7530_setup(struct dsa_switch *ds) priv->p6_interface = PHY_INTERFACE_MODE_NA; + if ((val & HWTRAP_XTAL_MASK) == HWTRAP_XTAL_40MHZ) + mt7530_pll_setup(priv); + mt753x_trap_frames(priv); /* Enable and reset MIB counters */ @@ -2276,6 +2459,9 @@ mt7530_setup(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } + /* Allow mirroring frames received on the local port (monitor port). */ + mt7530_set(priv, MT753X_AGC, LOCAL_EN); + /* Setup VLAN ID 0 for VLAN-unaware bridges */ ret = mt7530_setup_vlan0(priv); if (ret) @@ -2346,16 +2532,8 @@ static int mt7531_setup_common(struct dsa_switch *ds) { struct mt7530_priv *priv = ds->priv; - struct dsa_port *cpu_dp; int ret, i; - /* BPDU to CPU port */ - dsa_switch_for_each_cpu_port(cpu_dp, ds) { - mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK, - BIT(cpu_dp->index)); - break; - } - mt753x_trap_frames(priv); /* Enable and reset MIB counters */ @@ -2392,6 +2570,9 @@ mt7531_setup_common(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } + /* Allow mirroring frames received on the local port (monitor port). */ + mt7530_set(priv, MT753X_AGC, LOCAL_EN); + /* Flush the FDB table */ ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL); if (ret < 0) @@ -2406,7 +2587,7 @@ mt7531_setup(struct dsa_switch *ds) struct mt7530_priv *priv = ds->priv; struct mt7530_dummy_poll p; u32 val, id; - int ret; + int ret, i; /* Reset whole chip through gpio pin or memory-mapped registers for * different type of hardware @@ -2466,18 +2647,25 @@ mt7531_setup(struct dsa_switch *ds) priv->p5_interface = PHY_INTERFACE_MODE_NA; priv->p6_interface = PHY_INTERFACE_MODE_NA; - /* Enable PHY core PLL, since phy_device has not yet been created - * provided for phy_[read,write]_mmd_indirect is called, we provide - * our own mt7531_ind_mmd_phy_[read,write] to complete this - * function. + /* Enable Energy-Efficient Ethernet (EEE) and PHY core PLL, since + * phy_device has not yet been created provided for + * phy_[read,write]_mmd_indirect is called, we provide our own + * mt7531_ind_mmd_phy_[read,write] to complete this function. */ val = mt7531_ind_c45_phy_read(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2, CORE_PLL_GROUP4); - val |= MT7531_PHY_PLL_BYPASS_MODE; + val |= MT7531_RG_SYSPLL_DMY2 | MT7531_PHY_PLL_BYPASS_MODE; val &= ~MT7531_PHY_PLL_OFF; mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2, CORE_PLL_GROUP4, val); + /* Disable EEE advertisement on the switch PHYs. */ + for (i = MT753X_CTRL_PHY_ADDR; + i < MT753X_CTRL_PHY_ADDR + MT7530_NUM_PHYS; i++) { + mt7531_ind_c45_phy_write(priv, i, MDIO_MMD_AN, MDIO_AN_EEE_ADV, + 0); + } + mt7531_setup_common(ds); /* Setup VLAN ID 0 for VLAN-unaware bridges */ @@ -3215,6 +3403,7 @@ static int mt753x_set_mac_eee(struct dsa_switch *ds, int port, static const struct dsa_switch_ops mt7530_switch_ops = { .get_tag_protocol = mtk_get_tag_protocol, .setup = mt753x_setup, + .preferred_default_local_cpu_port = mt753x_preferred_default_local_cpu_port, .get_strings = mt7530_get_strings, .get_ethtool_stats = mt7530_get_ethtool_stats, .get_sset_count = mt7530_get_sset_count, diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 03598f9ae2..4a013680ce 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -32,6 +32,10 @@ enum mt753x_id { #define SYSC_REG_RSTCTRL 0x34 #define RESET_MCM BIT(2) +/* Register for ARL global control */ +#define MT753X_AGC 0xc +#define LOCAL_EN BIT(7) + /* Registers to mac forward control for unknown frames */ #define MT7530_MFC 0x10 #define BC_FFP(x) (((x) & 0xff) << 24) @@ -54,6 +58,7 @@ enum mt753x_id { #define MT7531_MIRROR_PORT_GET(x) (((x) >> 16) & MIRROR_MASK) #define MT7531_MIRROR_PORT_SET(x) (((x) & MIRROR_MASK) << 16) #define MT7531_CPU_PMAP_MASK GENMASK(7, 0) +#define MT7531_CPU_PMAP(x) FIELD_PREP(MT7531_CPU_PMAP_MASK, x) #define MT753X_MIRROR_REG(id) (((id) == ID_MT7531) ? \ MT7531_CFC : MT7530_MFC) @@ -64,6 +69,7 @@ enum mt753x_id { /* Registers for BPDU and PAE frame control*/ #define MT753X_BPC 0x24 +#define MT753X_PAE_BPDU_FR BIT(25) #define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22) #define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x) #define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16) @@ -74,20 +80,24 @@ enum mt753x_id { /* Register for :01 and :02 MAC DA frame control */ #define MT753X_RGAC1 0x28 +#define MT753X_R02_BPDU_FR BIT(25) #define MT753X_R02_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x) #define MT753X_R02_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x) +#define MT753X_R01_BPDU_FR BIT(9) #define MT753X_R01_EG_TAG_MASK GENMASK(8, 6) #define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x) #define MT753X_R01_PORT_FW_MASK GENMASK(2, 0) /* Register for :03 and :0E MAC DA frame control */ #define MT753X_RGAC2 0x2c +#define MT753X_R0E_BPDU_FR BIT(25) #define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x) #define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x) +#define MT753X_R03_BPDU_FR BIT(9) #define MT753X_R03_EG_TAG_MASK GENMASK(8, 6) #define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x) #define MT753X_R03_PORT_FW_MASK GENMASK(2, 0) @@ -663,6 +673,7 @@ enum mt7531_clk_skew { #define RG_SYSPLL_DDSFBK_EN BIT(12) #define RG_SYSPLL_BIAS_EN BIT(11) #define RG_SYSPLL_BIAS_LPF_EN BIT(10) +#define MT7531_RG_SYSPLL_DMY2 BIT(6) #define MT7531_PHY_PLL_OFF BIT(5) #define MT7531_PHY_PLL_BYPASS_MODE BIT(4) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 30fba1ea93..5ddd97f79e 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2585,6 +2585,7 @@ static int mv88e6xxx_software_reset(struct mv88e6xxx_chip *chip) static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip) { struct gpio_desc *gpiod = chip->reset; + int err; /* If there is a GPIO connected to the reset pin, toggle it */ if (gpiod) { @@ -2593,17 +2594,26 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip) * mid-byte, causing the first EEPROM read after the reset * from the wrong location resulting in the switch booting * to wrong mode and inoperable. + * For this reason, switch families with EEPROM support + * generally wait for EEPROM loads to complete as their pre- + * and post-reset handlers. */ - if (chip->info->ops->get_eeprom) - mv88e6xxx_g2_eeprom_wait(chip); + if (chip->info->ops->hardware_reset_pre) { + err = chip->info->ops->hardware_reset_pre(chip); + if (err) + dev_err(chip->dev, "pre-reset error: %d\n", err); + } gpiod_set_value_cansleep(gpiod, 1); usleep_range(10000, 20000); gpiod_set_value_cansleep(gpiod, 0); usleep_range(10000, 20000); - if (chip->info->ops->get_eeprom) - mv88e6xxx_g2_eeprom_wait(chip); + if (chip->info->ops->hardware_reset_post) { + err = chip->info->ops->hardware_reset_post(chip); + if (err) + dev_err(chip->dev, "post-reset error: %d\n", err); + } } } @@ -3824,6 +3834,8 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6390_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4006,6 +4018,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6352_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4103,6 +4117,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6352_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4201,6 +4217,8 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6390_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4262,6 +4280,8 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6390_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4321,6 +4341,8 @@ static const struct mv88e6xxx_ops mv88e6191_ops = { .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6390_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4383,6 +4405,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6352_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4438,6 +4462,8 @@ static const struct mv88e6xxx_ops mv88e6250_ops = { .watchdog_ops = &mv88e6250_watchdog_ops, .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6250_g1_wait_eeprom_done_prereset, + .hardware_reset_post = mv88e6xxx_g1_wait_eeprom_done, .reset = mv88e6250_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, @@ -4482,6 +4508,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6390_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4543,6 +4571,8 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, @@ -4586,6 +4616,8 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, @@ -4634,6 +4666,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6390_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4784,6 +4818,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6352_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4848,6 +4884,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6390_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4913,6 +4951,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6390_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -4981,6 +5021,8 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = { .watchdog_ops = &mv88e6393x_watchdog_ops, .mgmt_rsvd2cpu = mv88e6393x_port_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, + .hardware_reset_pre = mv88e6xxx_g2_eeprom_wait, + .hardware_reset_post = mv88e6xxx_g2_eeprom_wait, .reset = mv88e6352_g1_reset, .rmu_disable = mv88e6390_g1_rmu_disable, .atu_get_hash = mv88e6165_g1_atu_get_hash, @@ -5116,7 +5158,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6141, .family = MV88E6XXX_FAMILY_6341, .name = "Marvell 88E6141", - .num_databases = 4096, + .num_databases = 256, .num_macs = 2048, .num_ports = 6, .num_internal_phys = 5, @@ -5559,7 +5601,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6341, .family = MV88E6XXX_FAMILY_6341, .name = "Marvell 88E6341", - .num_databases = 4096, + .num_databases = 256, .num_macs = 2048, .num_internal_phys = 5, .num_ports = 6, diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 6b7307edaf..6b86e7645b 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -434,6 +434,12 @@ struct mv88e6xxx_ops { int (*ppu_enable)(struct mv88e6xxx_chip *chip); int (*ppu_disable)(struct mv88e6xxx_chip *chip); + /* Additional handlers to run before and after hard reset, to make sure + * that the switch and EEPROM are in a good state. + */ + int (*hardware_reset_pre)(struct mv88e6xxx_chip *chip); + int (*hardware_reset_post)(struct mv88e6xxx_chip *chip); + /* Switch Software Reset */ int (*reset)(struct mv88e6xxx_chip *chip); diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index 9649282857..83c6d1fab9 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -75,6 +75,95 @@ static int mv88e6xxx_g1_wait_init_ready(struct mv88e6xxx_chip *chip) return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_STS, bit, 1); } +static int mv88e6250_g1_eeprom_reload(struct mv88e6xxx_chip *chip) +{ + /* MV88E6185_G1_CTL1_RELOAD_EEPROM is also valid for 88E6250 */ + int bit = __bf_shf(MV88E6185_G1_CTL1_RELOAD_EEPROM); + u16 val; + int err; + + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &val); + if (err) + return err; + + val |= MV88E6185_G1_CTL1_RELOAD_EEPROM; + + err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, val); + if (err) + return err; + + return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_CTL1, bit, 0); +} + +/* Returns 0 when done, -EBUSY when waiting, other negative codes on error */ +static int mv88e6xxx_g1_is_eeprom_done(struct mv88e6xxx_chip *chip) +{ + u16 val; + int err; + + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &val); + if (err < 0) { + dev_err(chip->dev, "Error reading status"); + return err; + } + + /* If the switch is still resetting, it may not + * respond on the bus, and so MDIO read returns + * 0xffff. Differentiate between that, and waiting for + * the EEPROM to be done by bit 0 being set. + */ + if (val == 0xffff || !(val & BIT(MV88E6XXX_G1_STS_IRQ_EEPROM_DONE))) + return -EBUSY; + + return 0; +} + +/* As the EEInt (EEPROM done) flag clears on read if the status register, this + * function must be called directly after a hard reset or EEPROM ReLoad request, + * or the done condition may have been missed + */ +int mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip) +{ + const unsigned long timeout = jiffies + 1 * HZ; + int ret; + + /* Wait up to 1 second for the switch to finish reading the + * EEPROM. + */ + while (time_before(jiffies, timeout)) { + ret = mv88e6xxx_g1_is_eeprom_done(chip); + if (ret != -EBUSY) + return ret; + } + + dev_err(chip->dev, "Timeout waiting for EEPROM done"); + return -ETIMEDOUT; +} + +int mv88e6250_g1_wait_eeprom_done_prereset(struct mv88e6xxx_chip *chip) +{ + int ret; + + ret = mv88e6xxx_g1_is_eeprom_done(chip); + if (ret != -EBUSY) + return ret; + + /* Pre-reset, we don't know the state of the switch - when + * mv88e6xxx_g1_is_eeprom_done() returns -EBUSY, that may be because + * the switch is actually busy reading the EEPROM, or because + * MV88E6XXX_G1_STS_IRQ_EEPROM_DONE has been cleared by an unrelated + * status register read already. + * + * To account for the latter case, trigger another EEPROM reload for + * another chance at seeing the done flag. + */ + ret = mv88e6250_g1_eeprom_reload(chip); + if (ret) + return ret; + + return mv88e6xxx_g1_wait_eeprom_done(chip); +} + /* Offset 0x01: Switch MAC Address Register Bytes 0 & 1 * Offset 0x02: Switch MAC Address Register Bytes 2 & 3 * Offset 0x03: Switch MAC Address Register Bytes 4 & 5 diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index 6f41762eff..92fcebade8 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -280,6 +280,8 @@ int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr); int mv88e6185_g1_reset(struct mv88e6xxx_chip *chip); int mv88e6352_g1_reset(struct mv88e6xxx_chip *chip); int mv88e6250_g1_reset(struct mv88e6xxx_chip *chip); +int mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip); +int mv88e6250_g1_wait_eeprom_done_prereset(struct mv88e6xxx_chip *chip); int mv88e6185_g1_ppu_enable(struct mv88e6xxx_chip *chip); int mv88e6185_g1_ppu_disable(struct mv88e6xxx_chip *chip); diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 493192a800..888f10d93b 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -853,11 +853,11 @@ static int sja1105_init_general_params(struct sja1105_private *priv) .hostprio = 7, .mac_fltres1 = SJA1105_LINKLOCAL_FILTER_A, .mac_flt1 = SJA1105_LINKLOCAL_FILTER_A_MASK, - .incl_srcpt1 = false, + .incl_srcpt1 = true, .send_meta1 = false, .mac_fltres0 = SJA1105_LINKLOCAL_FILTER_B, .mac_flt0 = SJA1105_LINKLOCAL_FILTER_B_MASK, - .incl_srcpt0 = false, + .incl_srcpt0 = true, .send_meta0 = false, /* Default to an invalid value */ .mirr_port = priv->ds->num_ports, @@ -2346,11 +2346,6 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, general_params->tpid = tpid; /* EtherType used to identify outer tagged (S-tag) VLAN traffic */ general_params->tpid2 = tpid2; - /* When VLAN filtering is on, we need to at least be able to - * decode management traffic through the "backup plan". - */ - general_params->incl_srcpt1 = enabled; - general_params->incl_srcpt0 = enabled; /* VLAN filtering => independent VLAN learning. * No VLAN filtering (or best effort) => shared VLAN learning. diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h index f5ec35fa4c..6de0d590be 100644 --- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -48,6 +48,11 @@ enum ena_admin_aq_feature_id { ENA_ADMIN_FEATURES_OPCODE_NUM = 32, }; +/* device capabilities */ +enum ena_admin_aq_caps_id { + ENA_ADMIN_ENI_STATS = 0, +}; + enum ena_admin_placement_policy_type { /* descriptors and headers are in host memory */ ENA_ADMIN_PLACEMENT_POLICY_HOST = 1, @@ -455,7 +460,10 @@ struct ena_admin_device_attr_feature_desc { */ u32 supported_features; - u32 reserved3; + /* bitmap of ena_admin_aq_caps_id, which represents device + * capabilities. + */ + u32 capabilities; /* Indicates how many bits are used physical address access. */ u32 phys_addr_width; @@ -861,7 +869,9 @@ struct ena_admin_host_info { * 2 : interrupt_moderation * 3 : rx_buf_mirroring * 4 : rss_configurable_function_key - * 31:5 : reserved + * 5 : reserved + * 6 : rx_page_reuse + * 31:7 : reserved */ u32 driver_supported_features; }; @@ -1176,6 +1186,8 @@ struct ena_admin_ena_mmio_req_read_less_resp { #define ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK BIT(3) #define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_SHIFT 4 #define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK BIT(4) +#define ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_SHIFT 6 +#define ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_MASK BIT(6) /* aenq_common_desc */ #define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 7979b10192..276f6a8631 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -90,8 +90,7 @@ static int ena_com_admin_init_sq(struct ena_com_admin_queue *admin_queue) struct ena_com_admin_sq *sq = &admin_queue->sq; u16 size = ADMIN_SQ_SIZE(admin_queue->q_depth); - sq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size, - &sq->dma_addr, GFP_KERNEL); + sq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size, &sq->dma_addr, GFP_KERNEL); if (!sq->entries) { netdev_err(ena_dev->net_device, "Memory allocation failed\n"); @@ -113,8 +112,7 @@ static int ena_com_admin_init_cq(struct ena_com_admin_queue *admin_queue) struct ena_com_admin_cq *cq = &admin_queue->cq; u16 size = ADMIN_CQ_SIZE(admin_queue->q_depth); - cq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size, - &cq->dma_addr, GFP_KERNEL); + cq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size, &cq->dma_addr, GFP_KERNEL); if (!cq->entries) { netdev_err(ena_dev->net_device, "Memory allocation failed\n"); @@ -136,8 +134,7 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *ena_dev, ena_dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH; size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH); - aenq->entries = dma_alloc_coherent(ena_dev->dmadev, size, - &aenq->dma_addr, GFP_KERNEL); + aenq->entries = dma_alloc_coherent(ena_dev->dmadev, size, &aenq->dma_addr, GFP_KERNEL); if (!aenq->entries) { netdev_err(ena_dev->net_device, "Memory allocation failed\n"); @@ -155,14 +152,13 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *ena_dev, aenq_caps = 0; aenq_caps |= ena_dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK; - aenq_caps |= (sizeof(struct ena_admin_aenq_entry) - << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & - ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; + aenq_caps |= + (sizeof(struct ena_admin_aenq_entry) << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; writel(aenq_caps, ena_dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF); if (unlikely(!aenq_handlers)) { - netdev_err(ena_dev->net_device, - "AENQ handlers pointer is NULL\n"); + netdev_err(ena_dev->net_device, "AENQ handlers pointer is NULL\n"); return -EINVAL; } @@ -189,14 +185,12 @@ static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *admin_queu } if (unlikely(!admin_queue->comp_ctx)) { - netdev_err(admin_queue->ena_dev->net_device, - "Completion context is NULL\n"); + netdev_err(admin_queue->ena_dev->net_device, "Completion context is NULL\n"); return NULL; } if (unlikely(admin_queue->comp_ctx[command_id].occupied && capture)) { - netdev_err(admin_queue->ena_dev->net_device, - "Completion context is occupied\n"); + netdev_err(admin_queue->ena_dev->net_device, "Completion context is occupied\n"); return NULL; } @@ -226,8 +220,7 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu /* In case of queue FULL */ cnt = (u16)atomic_read(&admin_queue->outstanding_cmds); if (cnt >= admin_queue->q_depth) { - netdev_dbg(admin_queue->ena_dev->net_device, - "Admin queue is full.\n"); + netdev_dbg(admin_queue->ena_dev->net_device, "Admin queue is full.\n"); admin_queue->stats.out_of_space++; return ERR_PTR(-ENOSPC); } @@ -274,8 +267,7 @@ static int ena_com_init_comp_ctxt(struct ena_com_admin_queue *admin_queue) struct ena_comp_ctx *comp_ctx; u16 i; - admin_queue->comp_ctx = - devm_kzalloc(admin_queue->q_dmadev, size, GFP_KERNEL); + admin_queue->comp_ctx = devm_kzalloc(admin_queue->q_dmadev, size, GFP_KERNEL); if (unlikely(!admin_queue->comp_ctx)) { netdev_err(ena_dev->net_device, "Memory allocation failed\n"); return -ENOMEM; @@ -320,7 +312,6 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, struct ena_com_io_sq *io_sq) { size_t size; - int dev_node = 0; memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr)); @@ -333,23 +324,17 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, size = io_sq->desc_entry_size * io_sq->q_depth; if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { - dev_node = dev_to_node(ena_dev->dmadev); - set_dev_node(ena_dev->dmadev, ctx->numa_node); io_sq->desc_addr.virt_addr = - dma_alloc_coherent(ena_dev->dmadev, size, - &io_sq->desc_addr.phys_addr, + dma_alloc_coherent(ena_dev->dmadev, size, &io_sq->desc_addr.phys_addr, GFP_KERNEL); - set_dev_node(ena_dev->dmadev, dev_node); if (!io_sq->desc_addr.virt_addr) { io_sq->desc_addr.virt_addr = dma_alloc_coherent(ena_dev->dmadev, size, - &io_sq->desc_addr.phys_addr, - GFP_KERNEL); + &io_sq->desc_addr.phys_addr, GFP_KERNEL); } if (!io_sq->desc_addr.virt_addr) { - netdev_err(ena_dev->net_device, - "Memory allocation failed\n"); + netdev_err(ena_dev->net_device, "Memory allocation failed\n"); return -ENOMEM; } } @@ -362,21 +347,16 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, ENA_COM_BOUNCE_BUFFER_CNTRL_CNT; io_sq->bounce_buf_ctrl.next_to_use = 0; - size = io_sq->bounce_buf_ctrl.buffer_size * + size = (size_t)io_sq->bounce_buf_ctrl.buffer_size * io_sq->bounce_buf_ctrl.buffers_num; - dev_node = dev_to_node(ena_dev->dmadev); - set_dev_node(ena_dev->dmadev, ctx->numa_node); - io_sq->bounce_buf_ctrl.base_buffer = - devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); - set_dev_node(ena_dev->dmadev, dev_node); + io_sq->bounce_buf_ctrl.base_buffer = devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); if (!io_sq->bounce_buf_ctrl.base_buffer) io_sq->bounce_buf_ctrl.base_buffer = devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); if (!io_sq->bounce_buf_ctrl.base_buffer) { - netdev_err(ena_dev->net_device, - "Bounce buffer memory allocation failed\n"); + netdev_err(ena_dev->net_device, "Bounce buffer memory allocation failed\n"); return -ENOMEM; } @@ -410,7 +390,6 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev, struct ena_com_io_cq *io_cq) { size_t size; - int prev_node = 0; memset(&io_cq->cdesc_addr, 0x0, sizeof(io_cq->cdesc_addr)); @@ -422,16 +401,11 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev, size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; - prev_node = dev_to_node(ena_dev->dmadev); - set_dev_node(ena_dev->dmadev, ctx->numa_node); io_cq->cdesc_addr.virt_addr = - dma_alloc_coherent(ena_dev->dmadev, size, - &io_cq->cdesc_addr.phys_addr, GFP_KERNEL); - set_dev_node(ena_dev->dmadev, prev_node); + dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr, GFP_KERNEL); if (!io_cq->cdesc_addr.virt_addr) { io_cq->cdesc_addr.virt_addr = - dma_alloc_coherent(ena_dev->dmadev, size, - &io_cq->cdesc_addr.phys_addr, + dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr, GFP_KERNEL); } @@ -514,8 +488,8 @@ static int ena_com_comp_status_to_errno(struct ena_com_admin_queue *admin_queue, u8 comp_status) { if (unlikely(comp_status != 0)) - netdev_err(admin_queue->ena_dev->net_device, - "Admin command failed[%u]\n", comp_status); + netdev_err(admin_queue->ena_dev->net_device, "Admin command failed[%u]\n", + comp_status); switch (comp_status) { case ENA_ADMIN_SUCCESS: @@ -580,8 +554,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c } if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) { - netdev_err(admin_queue->ena_dev->net_device, - "Command was aborted\n"); + netdev_err(admin_queue->ena_dev->net_device, "Command was aborted\n"); spin_lock_irqsave(&admin_queue->q_lock, flags); admin_queue->stats.aborted_cmd++; spin_unlock_irqrestore(&admin_queue->q_lock, flags); @@ -589,8 +562,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c goto err; } - WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n", - comp_ctx->status); + WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n", comp_ctx->status); ret = ena_com_comp_status_to_errno(admin_queue, comp_ctx->comp_status); err: @@ -634,8 +606,7 @@ static int ena_com_set_llq(struct ena_com_dev *ena_dev) sizeof(resp)); if (unlikely(ret)) - netdev_err(ena_dev->net_device, - "Failed to set LLQ configurations: %d\n", ret); + netdev_err(ena_dev->net_device, "Failed to set LLQ configurations: %d\n", ret); return ret; } @@ -658,8 +629,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, llq_default_cfg->llq_header_location; } else { netdev_err(ena_dev->net_device, - "Invalid header location control, supported: 0x%x\n", - supported_feat); + "Invalid header location control, supported: 0x%x\n", supported_feat); return -EINVAL; } @@ -681,8 +651,8 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, netdev_err(ena_dev->net_device, "Default llq stride ctrl is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n", - llq_default_cfg->llq_stride_ctrl, - supported_feat, llq_info->desc_stride_ctrl); + llq_default_cfg->llq_stride_ctrl, supported_feat, + llq_info->desc_stride_ctrl); } } else { llq_info->desc_stride_ctrl = 0; @@ -704,8 +674,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, llq_info->desc_list_entry_size = 256; } else { netdev_err(ena_dev->net_device, - "Invalid entry_size_ctrl, supported: 0x%x\n", - supported_feat); + "Invalid entry_size_ctrl, supported: 0x%x\n", supported_feat); return -EINVAL; } @@ -750,8 +719,8 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, netdev_err(ena_dev->net_device, "Default llq num descs before header is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n", - llq_default_cfg->llq_num_decs_before_header, - supported_feat, llq_info->descs_num_before_header); + llq_default_cfg->llq_num_decs_before_header, supported_feat, + llq_info->descs_num_before_header); } /* Check for accelerated queue supported */ llq_accel_mode_get = llq_features->accel_mode.u.get; @@ -767,8 +736,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, rc = ena_com_set_llq(ena_dev); if (rc) - netdev_err(ena_dev->net_device, - "Cannot set LLQ configuration: %d\n", rc); + netdev_err(ena_dev->net_device, "Cannot set LLQ configuration: %d\n", rc); return rc; } @@ -780,8 +748,7 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com int ret; wait_for_completion_timeout(&comp_ctx->wait_event, - usecs_to_jiffies( - admin_queue->completion_timeout)); + usecs_to_jiffies(admin_queue->completion_timeout)); /* In case the command wasn't completed find out the root cause. * There might be 2 kinds of errors @@ -797,8 +764,7 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com if (comp_ctx->status == ENA_CMD_COMPLETED) { netdev_err(admin_queue->ena_dev->net_device, "The ena device sent a completion but the driver didn't receive a MSI-X interrupt (cmd %d), autopolling mode is %s\n", - comp_ctx->cmd_opcode, - admin_queue->auto_polling ? "ON" : "OFF"); + comp_ctx->cmd_opcode, admin_queue->auto_polling ? "ON" : "OFF"); /* Check if fallback to polling is enabled */ if (admin_queue->auto_polling) admin_queue->polling = true; @@ -867,15 +833,13 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) if (unlikely(i == timeout)) { netdev_err(ena_dev->net_device, "Reading reg failed for timeout. expected: req id[%u] offset[%u] actual: req id[%u] offset[%u]\n", - mmio_read->seq_num, offset, read_resp->req_id, - read_resp->reg_off); + mmio_read->seq_num, offset, read_resp->req_id, read_resp->reg_off); ret = ENA_MMIO_READ_TIMEOUT; goto err; } if (read_resp->reg_off != offset) { - netdev_err(ena_dev->net_device, - "Read failure: wrong offset provided\n"); + netdev_err(ena_dev->net_device, "Read failure: wrong offset provided\n"); ret = ENA_MMIO_READ_TIMEOUT; } else { ret = read_resp->reg_val; @@ -934,8 +898,7 @@ static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev, sizeof(destroy_resp)); if (unlikely(ret && (ret != -ENODEV))) - netdev_err(ena_dev->net_device, - "Failed to destroy io sq error: %d\n", ret); + netdev_err(ena_dev->net_device, "Failed to destroy io sq error: %d\n", ret); return ret; } @@ -949,8 +912,7 @@ static void ena_com_io_queue_free(struct ena_com_dev *ena_dev, if (io_cq->cdesc_addr.virt_addr) { size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; - dma_free_coherent(ena_dev->dmadev, size, - io_cq->cdesc_addr.virt_addr, + dma_free_coherent(ena_dev->dmadev, size, io_cq->cdesc_addr.virt_addr, io_cq->cdesc_addr.phys_addr); io_cq->cdesc_addr.virt_addr = NULL; @@ -959,8 +921,7 @@ static void ena_com_io_queue_free(struct ena_com_dev *ena_dev, if (io_sq->desc_addr.virt_addr) { size = io_sq->desc_entry_size * io_sq->q_depth; - dma_free_coherent(ena_dev->dmadev, size, - io_sq->desc_addr.virt_addr, + dma_free_coherent(ena_dev->dmadev, size, io_sq->desc_addr.virt_addr, io_sq->desc_addr.phys_addr); io_sq->desc_addr.virt_addr = NULL; @@ -985,8 +946,7 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) { - netdev_err(ena_dev->net_device, - "Reg read timeout occurred\n"); + netdev_err(ena_dev->net_device, "Reg read timeout occurred\n"); return -ETIME; } @@ -1026,8 +986,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, int ret; if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) { - netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", - feature_id); + netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", feature_id); return -EOPNOTSUPP; } @@ -1064,8 +1023,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, if (unlikely(ret)) netdev_err(ena_dev->net_device, - "Failed to submit get_feature command %d error: %d\n", - feature_id, ret); + "Failed to submit get_feature command %d error: %d\n", feature_id, ret); return ret; } @@ -1104,13 +1062,11 @@ static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) { struct ena_rss *rss = &ena_dev->rss; - if (!ena_com_check_supported_feature_id(ena_dev, - ENA_ADMIN_RSS_HASH_FUNCTION)) + if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_HASH_FUNCTION)) return -EOPNOTSUPP; - rss->hash_key = - dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), - &rss->hash_key_dma_addr, GFP_KERNEL); + rss->hash_key = dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), + &rss->hash_key_dma_addr, GFP_KERNEL); if (unlikely(!rss->hash_key)) return -ENOMEM; @@ -1123,8 +1079,8 @@ static void ena_com_hash_key_destroy(struct ena_com_dev *ena_dev) struct ena_rss *rss = &ena_dev->rss; if (rss->hash_key) - dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), - rss->hash_key, rss->hash_key_dma_addr); + dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), rss->hash_key, + rss->hash_key_dma_addr); rss->hash_key = NULL; } @@ -1132,9 +1088,8 @@ static int ena_com_hash_ctrl_init(struct ena_com_dev *ena_dev) { struct ena_rss *rss = &ena_dev->rss; - rss->hash_ctrl = - dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), - &rss->hash_ctrl_dma_addr, GFP_KERNEL); + rss->hash_ctrl = dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), + &rss->hash_ctrl_dma_addr, GFP_KERNEL); if (unlikely(!rss->hash_ctrl)) return -ENOMEM; @@ -1147,8 +1102,8 @@ static void ena_com_hash_ctrl_destroy(struct ena_com_dev *ena_dev) struct ena_rss *rss = &ena_dev->rss; if (rss->hash_ctrl) - dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), - rss->hash_ctrl, rss->hash_ctrl_dma_addr); + dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), rss->hash_ctrl, + rss->hash_ctrl_dma_addr); rss->hash_ctrl = NULL; } @@ -1177,15 +1132,13 @@ static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev, tbl_size = (1ULL << log_size) * sizeof(struct ena_admin_rss_ind_table_entry); - rss->rss_ind_tbl = - dma_alloc_coherent(ena_dev->dmadev, tbl_size, - &rss->rss_ind_tbl_dma_addr, GFP_KERNEL); + rss->rss_ind_tbl = dma_alloc_coherent(ena_dev->dmadev, tbl_size, &rss->rss_ind_tbl_dma_addr, + GFP_KERNEL); if (unlikely(!rss->rss_ind_tbl)) goto mem_err1; tbl_size = (1ULL << log_size) * sizeof(u16); - rss->host_rss_ind_tbl = - devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL); + rss->host_rss_ind_tbl = devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL); if (unlikely(!rss->host_rss_ind_tbl)) goto mem_err2; @@ -1197,8 +1150,7 @@ static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev, tbl_size = (1ULL << log_size) * sizeof(struct ena_admin_rss_ind_table_entry); - dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, - rss->rss_ind_tbl_dma_addr); + dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, rss->rss_ind_tbl_dma_addr); rss->rss_ind_tbl = NULL; mem_err1: rss->tbl_log_size = 0; @@ -1261,8 +1213,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev, &create_cmd.sq_ba, io_sq->desc_addr.phys_addr); if (unlikely(ret)) { - netdev_err(ena_dev->net_device, - "Memory address set failed\n"); + netdev_err(ena_dev->net_device, "Memory address set failed\n"); return ret; } } @@ -1273,8 +1224,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev, (struct ena_admin_acq_entry *)&cmd_completion, sizeof(cmd_completion)); if (unlikely(ret)) { - netdev_err(ena_dev->net_device, - "Failed to create IO SQ. error: %d\n", ret); + netdev_err(ena_dev->net_device, "Failed to create IO SQ. error: %d\n", ret); return ret; } @@ -1292,8 +1242,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev, cmd_completion.llq_descriptors_offset); } - netdev_dbg(ena_dev->net_device, "Created sq[%u], depth[%u]\n", - io_sq->idx, io_sq->q_depth); + netdev_dbg(ena_dev->net_device, "Created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth); return ret; } @@ -1420,8 +1369,7 @@ int ena_com_create_io_cq(struct ena_com_dev *ena_dev, (struct ena_admin_acq_entry *)&cmd_completion, sizeof(cmd_completion)); if (unlikely(ret)) { - netdev_err(ena_dev->net_device, - "Failed to create IO CQ. error: %d\n", ret); + netdev_err(ena_dev->net_device, "Failed to create IO CQ. error: %d\n", ret); return ret; } @@ -1440,8 +1388,7 @@ int ena_com_create_io_cq(struct ena_com_dev *ena_dev, (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + cmd_completion.numa_node_register_offset); - netdev_dbg(ena_dev->net_device, "Created cq[%u], depth[%u]\n", - io_cq->idx, io_cq->q_depth); + netdev_dbg(ena_dev->net_device, "Created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth); return ret; } @@ -1451,8 +1398,7 @@ int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid, struct ena_com_io_cq **io_cq) { if (qid >= ENA_TOTAL_NUM_QUEUES) { - netdev_err(ena_dev->net_device, - "Invalid queue number %d but the max is %d\n", qid, + netdev_err(ena_dev->net_device, "Invalid queue number %d but the max is %d\n", qid, ENA_TOTAL_NUM_QUEUES); return -EINVAL; } @@ -1492,8 +1438,7 @@ void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev) spin_lock_irqsave(&admin_queue->q_lock, flags); while (atomic_read(&admin_queue->outstanding_cmds) != 0) { spin_unlock_irqrestore(&admin_queue->q_lock, flags); - ena_delay_exponential_backoff_us(exp++, - ena_dev->ena_min_poll_delay_us); + ena_delay_exponential_backoff_us(exp++, ena_dev->ena_min_poll_delay_us); spin_lock_irqsave(&admin_queue->q_lock, flags); } spin_unlock_irqrestore(&admin_queue->q_lock, flags); @@ -1519,8 +1464,7 @@ int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, sizeof(destroy_resp)); if (unlikely(ret && (ret != -ENODEV))) - netdev_err(ena_dev->net_device, - "Failed to destroy IO CQ. error: %d\n", ret); + netdev_err(ena_dev->net_device, "Failed to destroy IO CQ. error: %d\n", ret); return ret; } @@ -1588,8 +1532,7 @@ int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag) sizeof(resp)); if (unlikely(ret)) - netdev_err(ena_dev->net_device, - "Failed to config AENQ ret: %d\n", ret); + netdev_err(ena_dev->net_device, "Failed to config AENQ ret: %d\n", ret); return ret; } @@ -1610,8 +1553,7 @@ int ena_com_get_dma_width(struct ena_com_dev *ena_dev) netdev_dbg(ena_dev->net_device, "ENA dma width: %d\n", width); if ((width < 32) || width > ENA_MAX_PHYS_ADDR_SIZE_BITS) { - netdev_err(ena_dev->net_device, "DMA width illegal value: %d\n", - width); + netdev_err(ena_dev->net_device, "DMA width illegal value: %d\n", width); return -EINVAL; } @@ -1633,19 +1575,16 @@ int ena_com_validate_version(struct ena_com_dev *ena_dev) ctrl_ver = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CONTROLLER_VERSION_OFF); - if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) || - (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) { + if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) || (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) { netdev_err(ena_dev->net_device, "Reg read timeout occurred\n"); return -ETIME; } dev_info(ena_dev->dmadev, "ENA device version: %d.%d\n", - (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >> - ENA_REGS_VERSION_MAJOR_VERSION_SHIFT, + (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >> ENA_REGS_VERSION_MAJOR_VERSION_SHIFT, ver & ENA_REGS_VERSION_MINOR_VERSION_MASK); - dev_info(ena_dev->dmadev, - "ENA controller version: %d.%d.%d implementation version %d\n", + dev_info(ena_dev->dmadev, "ENA controller version: %d.%d.%d implementation version %d\n", (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >> @@ -1694,20 +1633,17 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev) size = ADMIN_SQ_SIZE(admin_queue->q_depth); if (sq->entries) - dma_free_coherent(ena_dev->dmadev, size, sq->entries, - sq->dma_addr); + dma_free_coherent(ena_dev->dmadev, size, sq->entries, sq->dma_addr); sq->entries = NULL; size = ADMIN_CQ_SIZE(admin_queue->q_depth); if (cq->entries) - dma_free_coherent(ena_dev->dmadev, size, cq->entries, - cq->dma_addr); + dma_free_coherent(ena_dev->dmadev, size, cq->entries, cq->dma_addr); cq->entries = NULL; size = ADMIN_AENQ_SIZE(aenq->q_depth); if (ena_dev->aenq.entries) - dma_free_coherent(ena_dev->dmadev, size, aenq->entries, - aenq->dma_addr); + dma_free_coherent(ena_dev->dmadev, size, aenq->entries, aenq->dma_addr); aenq->entries = NULL; } @@ -1733,10 +1669,8 @@ int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev) struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; spin_lock_init(&mmio_read->lock); - mmio_read->read_resp = - dma_alloc_coherent(ena_dev->dmadev, - sizeof(*mmio_read->read_resp), - &mmio_read->read_resp_dma_addr, GFP_KERNEL); + mmio_read->read_resp = dma_alloc_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp), + &mmio_read->read_resp_dma_addr, GFP_KERNEL); if (unlikely(!mmio_read->read_resp)) goto err; @@ -1767,8 +1701,8 @@ void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev) writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF); writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF); - dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp), - mmio_read->read_resp, mmio_read->read_resp_dma_addr); + dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp), mmio_read->read_resp, + mmio_read->read_resp_dma_addr); mmio_read->read_resp = NULL; } @@ -1800,8 +1734,7 @@ int ena_com_admin_init(struct ena_com_dev *ena_dev, } if (!(dev_sts & ENA_REGS_DEV_STS_READY_MASK)) { - netdev_err(ena_dev->net_device, - "Device isn't ready, abort com init\n"); + netdev_err(ena_dev->net_device, "Device isn't ready, abort com init\n"); return -ENODEV; } @@ -1878,8 +1811,7 @@ int ena_com_create_io_queue(struct ena_com_dev *ena_dev, int ret; if (ctx->qid >= ENA_TOTAL_NUM_QUEUES) { - netdev_err(ena_dev->net_device, - "Qid (%d) is bigger than max num of queues (%d)\n", + netdev_err(ena_dev->net_device, "Qid (%d) is bigger than max num of queues (%d)\n", ctx->qid, ENA_TOTAL_NUM_QUEUES); return -EINVAL; } @@ -1905,8 +1837,7 @@ int ena_com_create_io_queue(struct ena_com_dev *ena_dev, if (ctx->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) /* header length is limited to 8 bits */ - io_sq->tx_max_header_size = - min_t(u32, ena_dev->tx_max_header_size, SZ_256); + io_sq->tx_max_header_size = min_t(u32, ena_dev->tx_max_header_size, SZ_256); ret = ena_com_init_io_sq(ena_dev, ctx, io_sq); if (ret) @@ -1938,8 +1869,7 @@ void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid) struct ena_com_io_cq *io_cq; if (qid >= ENA_TOTAL_NUM_QUEUES) { - netdev_err(ena_dev->net_device, - "Qid (%d) is bigger than max num of queues (%d)\n", + netdev_err(ena_dev->net_device, "Qid (%d) is bigger than max num of queues (%d)\n", qid, ENA_TOTAL_NUM_QUEUES); return; } @@ -1974,6 +1904,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, sizeof(get_resp.u.dev_attr)); ena_dev->supported_features = get_resp.u.dev_attr.supported_features; + ena_dev->capabilities = get_resp.u.dev_attr.capabilities; if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { rc = ena_com_get_feature(ena_dev, &get_resp, @@ -1982,8 +1913,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, if (rc) return rc; - if (get_resp.u.max_queue_ext.version != - ENA_FEATURE_MAX_QUEUE_EXT_VER) + if (get_resp.u.max_queue_ext.version != ENA_FEATURE_MAX_QUEUE_EXT_VER) return -EINVAL; memcpy(&get_feat_ctx->max_queue_ext, &get_resp.u.max_queue_ext, @@ -2024,18 +1954,15 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS, 0); if (!rc) - memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints, - sizeof(get_resp.u.hw_hints)); + memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints, sizeof(get_resp.u.hw_hints)); else if (rc == -EOPNOTSUPP) - memset(&get_feat_ctx->hw_hints, 0x0, - sizeof(get_feat_ctx->hw_hints)); + memset(&get_feat_ctx->hw_hints, 0x0, sizeof(get_feat_ctx->hw_hints)); else return rc; rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ, 0); if (!rc) - memcpy(&get_feat_ctx->llq, &get_resp.u.llq, - sizeof(get_resp.u.llq)); + memcpy(&get_feat_ctx->llq, &get_resp.u.llq, sizeof(get_resp.u.llq)); else if (rc == -EOPNOTSUPP) memset(&get_feat_ctx->llq, 0x0, sizeof(get_feat_ctx->llq)); else @@ -2083,8 +2010,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data) aenq_common = &aenq_e->aenq_common_desc; /* Go over all the events */ - while ((READ_ONCE(aenq_common->flags) & - ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { + while ((READ_ONCE(aenq_common->flags) & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { /* Make sure the phase bit (ownership) is as expected before * reading the rest of the descriptor. */ @@ -2093,8 +2019,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data) timestamp = (u64)aenq_common->timestamp_low | ((u64)aenq_common->timestamp_high << 32); - netdev_dbg(ena_dev->net_device, - "AENQ! Group[%x] Syndrome[%x] timestamp: [%llus]\n", + netdev_dbg(ena_dev->net_device, "AENQ! Group[%x] Syndrome[%x] timestamp: [%llus]\n", aenq_common->group, aenq_common->syndrome, timestamp); /* Handle specific event*/ @@ -2123,8 +2048,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data) /* write the aenq doorbell after all AENQ descriptors were read */ mb(); - writel_relaxed((u32)aenq->head, - ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); + writel_relaxed((u32)aenq->head, ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); } int ena_com_dev_reset(struct ena_com_dev *ena_dev, @@ -2136,15 +2060,13 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev, stat = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); cap = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF); - if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) || - (cap == ENA_MMIO_READ_TIMEOUT))) { + if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) || (cap == ENA_MMIO_READ_TIMEOUT))) { netdev_err(ena_dev->net_device, "Reg read32 timeout occurred\n"); return -ETIME; } if ((stat & ENA_REGS_DEV_STS_READY_MASK) == 0) { - netdev_err(ena_dev->net_device, - "Device isn't ready, can't reset device\n"); + netdev_err(ena_dev->net_device, "Device isn't ready, can't reset device\n"); return -EINVAL; } @@ -2167,8 +2089,7 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev, rc = wait_for_reset_state(ena_dev, timeout, ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK); if (rc != 0) { - netdev_err(ena_dev->net_device, - "Reset indication didn't turn on\n"); + netdev_err(ena_dev->net_device, "Reset indication didn't turn on\n"); return rc; } @@ -2176,8 +2097,7 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev, writel(0, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF); rc = wait_for_reset_state(ena_dev, timeout, 0); if (rc != 0) { - netdev_err(ena_dev->net_device, - "Reset indication didn't turn off\n"); + netdev_err(ena_dev->net_device, "Reset indication didn't turn off\n"); return rc; } @@ -2214,8 +2134,7 @@ static int ena_get_dev_stats(struct ena_com_dev *ena_dev, sizeof(*get_resp)); if (unlikely(ret)) - netdev_err(ena_dev->net_device, - "Failed to get stats. error: %d\n", ret); + netdev_err(ena_dev->net_device, "Failed to get stats. error: %d\n", ret); return ret; } @@ -2226,6 +2145,12 @@ int ena_com_get_eni_stats(struct ena_com_dev *ena_dev, struct ena_com_stats_ctx ctx; int ret; + if (!ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) { + netdev_err(ena_dev->net_device, "Capability %d isn't supported\n", + ENA_ADMIN_ENI_STATS); + return -EOPNOTSUPP; + } + memset(&ctx, 0x0, sizeof(ctx)); ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_ENI); if (likely(ret == 0)) @@ -2258,8 +2183,7 @@ int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, u32 mtu) int ret; if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) { - netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", - ENA_ADMIN_MTU); + netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", ENA_ADMIN_MTU); return -EOPNOTSUPP; } @@ -2278,8 +2202,7 @@ int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, u32 mtu) sizeof(resp)); if (unlikely(ret)) - netdev_err(ena_dev->net_device, - "Failed to set mtu %d. error: %d\n", mtu, ret); + netdev_err(ena_dev->net_device, "Failed to set mtu %d. error: %d\n", mtu, ret); return ret; } @@ -2293,8 +2216,7 @@ int ena_com_get_offload_settings(struct ena_com_dev *ena_dev, ret = ena_com_get_feature(ena_dev, &resp, ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0); if (unlikely(ret)) { - netdev_err(ena_dev->net_device, - "Failed to get offload capabilities %d\n", ret); + netdev_err(ena_dev->net_device, "Failed to get offload capabilities %d\n", ret); return ret; } @@ -2312,8 +2234,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) struct ena_admin_get_feat_resp get_resp; int ret; - if (!ena_com_check_supported_feature_id(ena_dev, - ENA_ADMIN_RSS_HASH_FUNCTION)) { + if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_HASH_FUNCTION)) { netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_FUNCTION); return -EOPNOTSUPP; @@ -2326,8 +2247,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) return ret; if (!(get_resp.u.flow_hash_func.supported_func & BIT(rss->hash_func))) { - netdev_err(ena_dev->net_device, - "Func hash %d isn't supported by device, abort\n", + netdev_err(ena_dev->net_device, "Func hash %d isn't supported by device, abort\n", rss->hash_func); return -EOPNOTSUPP; } @@ -2357,8 +2277,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) (struct ena_admin_acq_entry *)&resp, sizeof(resp)); if (unlikely(ret)) { - netdev_err(ena_dev->net_device, - "Failed to set hash function %d. error: %d\n", + netdev_err(ena_dev->net_device, "Failed to set hash function %d. error: %d\n", rss->hash_func, ret); return -EINVAL; } @@ -2390,16 +2309,15 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, return rc; if (!(BIT(func) & get_resp.u.flow_hash_func.supported_func)) { - netdev_err(ena_dev->net_device, - "Flow hash function %d isn't supported\n", func); + netdev_err(ena_dev->net_device, "Flow hash function %d isn't supported\n", func); return -EOPNOTSUPP; } if ((func == ENA_ADMIN_TOEPLITZ) && key) { if (key_len != sizeof(hash_key->key)) { netdev_err(ena_dev->net_device, - "key len (%u) doesn't equal the supported size (%zu)\n", - key_len, sizeof(hash_key->key)); + "key len (%u) doesn't equal the supported size (%zu)\n", key_len, + sizeof(hash_key->key)); return -EINVAL; } memcpy(hash_key->key, key, key_len); @@ -2487,8 +2405,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev) struct ena_admin_set_feat_resp resp; int ret; - if (!ena_com_check_supported_feature_id(ena_dev, - ENA_ADMIN_RSS_HASH_INPUT)) { + if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_HASH_INPUT)) { netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_INPUT); return -EOPNOTSUPP; @@ -2519,8 +2436,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev) (struct ena_admin_acq_entry *)&resp, sizeof(resp)); if (unlikely(ret)) - netdev_err(ena_dev->net_device, - "Failed to set hash input. error: %d\n", ret); + netdev_err(ena_dev->net_device, "Failed to set hash input. error: %d\n", ret); return ret; } @@ -2597,8 +2513,7 @@ int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev, int rc; if (proto >= ENA_ADMIN_RSS_PROTO_NUM) { - netdev_err(ena_dev->net_device, "Invalid proto num (%u)\n", - proto); + netdev_err(ena_dev->net_device, "Invalid proto num (%u)\n", proto); return -EINVAL; } @@ -2650,8 +2565,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) struct ena_admin_set_feat_resp resp; int ret; - if (!ena_com_check_supported_feature_id( - ena_dev, ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG)) { + if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG)) { netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG); return -EOPNOTSUPP; @@ -2691,8 +2605,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) sizeof(resp)); if (unlikely(ret)) - netdev_err(ena_dev->net_device, - "Failed to set indirect table. error: %d\n", ret); + netdev_err(ena_dev->net_device, "Failed to set indirect table. error: %d\n", ret); return ret; } @@ -2771,9 +2684,8 @@ int ena_com_allocate_host_info(struct ena_com_dev *ena_dev) { struct ena_host_attribute *host_attr = &ena_dev->host_attr; - host_attr->host_info = - dma_alloc_coherent(ena_dev->dmadev, SZ_4K, - &host_attr->host_info_dma_addr, GFP_KERNEL); + host_attr->host_info = dma_alloc_coherent(ena_dev->dmadev, SZ_4K, + &host_attr->host_info_dma_addr, GFP_KERNEL); if (unlikely(!host_attr->host_info)) return -ENOMEM; @@ -2819,8 +2731,7 @@ void ena_com_delete_debug_area(struct ena_com_dev *ena_dev) if (host_attr->debug_area_virt_addr) { dma_free_coherent(ena_dev->dmadev, host_attr->debug_area_size, - host_attr->debug_area_virt_addr, - host_attr->debug_area_dma_addr); + host_attr->debug_area_virt_addr, host_attr->debug_area_dma_addr); host_attr->debug_area_virt_addr = NULL; } } @@ -2869,8 +2780,7 @@ int ena_com_set_host_attributes(struct ena_com_dev *ena_dev) sizeof(resp)); if (unlikely(ret)) - netdev_err(ena_dev->net_device, - "Failed to set host attributes: %d\n", ret); + netdev_err(ena_dev->net_device, "Failed to set host attributes: %d\n", ret); return ret; } @@ -2888,8 +2798,7 @@ static int ena_com_update_nonadaptive_moderation_interval(struct ena_com_dev *en u32 *intr_moder_interval) { if (!intr_delay_resolution) { - netdev_err(ena_dev->net_device, - "Illegal interrupt delay granularity value\n"); + netdev_err(ena_dev->net_device, "Illegal interrupt delay granularity value\n"); return -EFAULT; } @@ -2927,14 +2836,12 @@ int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev) if (rc) { if (rc == -EOPNOTSUPP) { - netdev_dbg(ena_dev->net_device, - "Feature %d isn't supported\n", + netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", ENA_ADMIN_INTERRUPT_MODERATION); rc = 0; } else { netdev_err(ena_dev->net_device, - "Failed to get interrupt moderation admin cmd. rc: %d\n", - rc); + "Failed to get interrupt moderation admin cmd. rc: %d\n", rc); } /* no moderation supported, disable adaptive support */ @@ -2982,8 +2889,7 @@ int ena_com_config_dev_mode(struct ena_com_dev *ena_dev, (llq_info->descs_num_before_header * sizeof(struct ena_eth_io_tx_desc)); if (unlikely(ena_dev->tx_max_header_size == 0)) { - netdev_err(ena_dev->net_device, - "The size of the LLQ entry is smaller than needed\n"); + netdev_err(ena_dev->net_device, "The size of the LLQ entry is smaller than needed\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 73b03ce594..3c5081d9d2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -314,6 +314,7 @@ struct ena_com_dev { struct ena_rss rss; u32 supported_features; + u32 capabilities; u32 dma_addr_bits; struct ena_host_attribute host_attr; @@ -967,6 +968,18 @@ static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_d ena_dev->adaptive_coalescing = false; } +/* ena_com_get_cap - query whether device supports a capability. + * @ena_dev: ENA communication layer struct + * @cap_id: enum value representing the capability + * + * @return - true if capability is supported or false otherwise + */ +static inline bool ena_com_get_cap(struct ena_com_dev *ena_dev, + enum ena_admin_aq_caps_id cap_id) +{ + return !!(ena_dev->capabilities & BIT(cap_id)); +} + /* ena_com_update_intr_reg - Prepare interrupt register * @intr_reg: interrupt register to update. * @rx_delay_interval: Rx interval in usecs diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c index f9f886289b..933e619b3a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -18,8 +18,7 @@ static struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr + (head_masked * io_cq->cdesc_entry_size_in_bytes)); - desc_phase = (READ_ONCE(cdesc->status) & - ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >> + desc_phase = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >> ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT; if (desc_phase != expected_phase) @@ -65,8 +64,8 @@ static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq, io_sq->entries_in_tx_burst_left--; netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device, - "Decreasing entries_in_tx_burst_left of queue %d to %d\n", - io_sq->qid, io_sq->entries_in_tx_burst_left); + "Decreasing entries_in_tx_burst_left of queue %d to %d\n", io_sq->qid, + io_sq->entries_in_tx_burst_left); } /* Make sure everything was written into the bounce buffer before @@ -75,8 +74,8 @@ static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq, wmb(); /* The line is completed. Copy it to dev */ - __iowrite64_copy(io_sq->desc_addr.pbuf_dev_addr + dst_offset, - bounce_buffer, (llq_info->desc_list_entry_size) / 8); + __iowrite64_copy(io_sq->desc_addr.pbuf_dev_addr + dst_offset, bounce_buffer, + (llq_info->desc_list_entry_size) / 8); io_sq->tail++; @@ -102,16 +101,14 @@ static int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq, header_offset = llq_info->descs_num_before_header * io_sq->desc_entry_size; - if (unlikely((header_offset + header_len) > - llq_info->desc_list_entry_size)) { + if (unlikely((header_offset + header_len) > llq_info->desc_list_entry_size)) { netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, "Trying to write header larger than llq entry can accommodate\n"); return -EFAULT; } if (unlikely(!bounce_buffer)) { - netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, - "Bounce buffer is NULL\n"); + netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, "Bounce buffer is NULL\n"); return -EFAULT; } @@ -129,8 +126,7 @@ static void *get_sq_desc_llq(struct ena_com_io_sq *io_sq) bounce_buffer = pkt_ctrl->curr_bounce_buf; if (unlikely(!bounce_buffer)) { - netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, - "Bounce buffer is NULL\n"); + netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, "Bounce buffer is NULL\n"); return NULL; } @@ -247,8 +243,7 @@ static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, ena_com_cq_inc_head(io_cq); count++; - last = (READ_ONCE(cdesc->status) & - ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >> + last = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >> ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT; } while (!last); @@ -369,9 +364,8 @@ static void ena_com_rx_set_flags(struct ena_com_io_cq *io_cq, netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device, "l3_proto %d l4_proto %d l3_csum_err %d l4_csum_err %d hash %d frag %d cdesc_status %x\n", - ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto, - ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err, - ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status); + ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto, ena_rx_ctx->l3_csum_err, + ena_rx_ctx->l4_csum_err, ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status); } /*****************************************************************************/ @@ -403,13 +397,12 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq, if (unlikely(header_len > io_sq->tx_max_header_size)) { netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, - "Header size is too large %d max header: %d\n", - header_len, io_sq->tx_max_header_size); + "Header size is too large %d max header: %d\n", header_len, + io_sq->tx_max_header_size); return -EINVAL; } - if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && - !buffer_to_push)) { + if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && !buffer_to_push)) { netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, "Push header wasn't provided in LLQ mode\n"); return -EINVAL; @@ -556,13 +549,11 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, } netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device, - "Fetch rx packet: queue %d completed desc: %d\n", io_cq->qid, - nb_hw_desc); + "Fetch rx packet: queue %d completed desc: %d\n", io_cq->qid, nb_hw_desc); if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) { netdev_err(ena_com_io_cq_to_ena_dev(io_cq)->net_device, - "Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc, - ena_rx_ctx->max_bufs); + "Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc, ena_rx_ctx->max_bufs); return -ENOSPC; } @@ -586,8 +577,8 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, io_sq->next_to_comp += nb_hw_desc; netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device, - "[%s][QID#%d] Updating SQ head to: %d\n", __func__, - io_sq->qid, io_sq->next_to_comp); + "[%s][QID#%d] Updating SQ head to: %d\n", __func__, io_sq->qid, + io_sq->next_to_comp); /* Get rx flags from the last pkt */ ena_com_rx_set_flags(io_cq, ena_rx_ctx, cdesc); @@ -624,8 +615,8 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, desc->req_id = req_id; netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device, - "[%s] Adding single RX desc, Queue: %u, req_id: %u\n", - __func__, io_sq->qid, req_id); + "[%s] Adding single RX desc, Queue: %u, req_id: %u\n", __func__, io_sq->qid, + req_id); desc->buff_addr_lo = (u32)ena_buf->paddr; desc->buff_addr_hi = diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h index 689313ee25..07029eee78 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -141,8 +141,8 @@ static inline bool ena_com_is_doorbell_needed(struct ena_com_io_sq *io_sq, } netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device, - "Queue: %d num_descs: %d num_entries_needed: %d\n", - io_sq->qid, num_descs, num_entries_needed); + "Queue: %d num_descs: %d num_entries_needed: %d\n", io_sq->qid, num_descs, + num_entries_needed); return num_entries_needed > io_sq->entries_in_tx_burst_left; } @@ -153,15 +153,14 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) u16 tail = io_sq->tail; netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device, - "Write submission queue doorbell for queue: %d tail: %d\n", - io_sq->qid, tail); + "Write submission queue doorbell for queue: %d tail: %d\n", io_sq->qid, tail); writel(tail, io_sq->db_addr); if (is_llq_max_tx_burst_exists(io_sq)) { netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device, - "Reset available entries in tx burst for queue %d to %d\n", - io_sq->qid, max_entries_in_tx_burst); + "Reset available entries in tx burst for queue %d to %d\n", io_sq->qid, + max_entries_in_tx_burst); io_sq->entries_in_tx_burst_left = max_entries_in_tx_burst; } @@ -244,8 +243,8 @@ static inline int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, *req_id = READ_ONCE(cdesc->req_id); if (unlikely(*req_id >= io_cq->q_depth)) { - netdev_err(ena_com_io_cq_to_ena_dev(io_cq)->net_device, - "Invalid req id %d\n", cdesc->req_id); + netdev_err(ena_com_io_cq_to_ena_dev(io_cq)->net_device, "Invalid req id %d\n", + cdesc->req_id); return -EINVAL; } diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 43c099141e..4d036b1ea6 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -103,7 +103,7 @@ static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) return; - adapter->reset_reason = ENA_REGS_RESET_OS_NETDEV_WD; + ena_reset_device(adapter, ENA_REGS_RESET_OS_NETDEV_WD); ena_increase_stat(&adapter->dev_stats.tx_timeout, 1, &adapter->syncp); netif_err(adapter, tx_err, dev, "Transmit time out\n"); @@ -164,13 +164,9 @@ static int ena_xmit_common(struct net_device *dev, if (unlikely(rc)) { netif_err(adapter, tx_queued, dev, "Failed to prepare tx bufs\n"); - ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1, - &ring->syncp); - if (rc != -ENOMEM) { - adapter->reset_reason = - ENA_REGS_RESET_DRIVER_INVALID_STATE; - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); - } + ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1, &ring->syncp); + if (rc != -ENOMEM) + ena_reset_device(adapter, ENA_REGS_RESET_DRIVER_INVALID_STATE); return rc; } @@ -994,8 +990,7 @@ static struct page *ena_alloc_map_page(struct ena_ring *rx_ring, */ page = dev_alloc_page(); if (!page) { - ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1, - &rx_ring->syncp); + ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1, &rx_ring->syncp); return ERR_PTR(-ENOSPC); } @@ -1024,7 +1019,7 @@ static int ena_alloc_rx_buffer(struct ena_ring *rx_ring, int tailroom; /* restore page offset value in case it has been changed by device */ - rx_info->page_offset = headroom; + rx_info->buf_offset = headroom; /* if previous allocated page is not used */ if (unlikely(rx_info->page)) @@ -1041,6 +1036,8 @@ static int ena_alloc_rx_buffer(struct ena_ring *rx_ring, tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); rx_info->page = page; + rx_info->dma_addr = dma; + rx_info->page_offset = 0; ena_buf = &rx_info->ena_buf; ena_buf->paddr = dma + headroom; ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom; @@ -1048,14 +1045,12 @@ static int ena_alloc_rx_buffer(struct ena_ring *rx_ring, return 0; } -static void ena_unmap_rx_buff(struct ena_ring *rx_ring, - struct ena_rx_buffer *rx_info) +static void ena_unmap_rx_buff_attrs(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info, + unsigned long attrs) { - struct ena_com_buf *ena_buf = &rx_info->ena_buf; - - dma_unmap_page(rx_ring->dev, ena_buf->paddr - rx_ring->rx_headroom, - ENA_PAGE_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE, DMA_BIDIRECTIONAL, + attrs); } static void ena_free_rx_page(struct ena_ring *rx_ring, @@ -1069,7 +1064,7 @@ static void ena_free_rx_page(struct ena_ring *rx_ring, return; } - ena_unmap_rx_buff(rx_ring, rx_info); + ena_unmap_rx_buff_attrs(rx_ring, rx_info, 0); __free_page(page); rx_info->page = NULL; @@ -1205,8 +1200,11 @@ static void ena_unmap_tx_buff(struct ena_ring *tx_ring, static void ena_free_tx_bufs(struct ena_ring *tx_ring) { bool print_once = true; + bool is_xdp_ring; u32 i; + is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid); + for (i = 0; i < tx_ring->ring_size; i++) { struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; @@ -1226,10 +1224,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring) ena_unmap_tx_buff(tx_ring, tx_info); - dev_kfree_skb_any(tx_info->skb); + if (is_xdp_ring) + xdp_return_frame(tx_info->xdpf); + else + dev_kfree_skb_any(tx_info->skb); } - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->qid)); + + if (!is_xdp_ring) + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->qid)); } static void ena_free_all_tx_bufs(struct ena_adapter *adapter) @@ -1289,10 +1292,8 @@ static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, req_id); ena_increase_stat(&ring->tx_stats.bad_req_id, 1, &ring->syncp); + ena_reset_device(ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); - /* Trigger device reset */ - ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; - set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags); return -EFAULT; } @@ -1340,8 +1341,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) &req_id); if (rc) { if (unlikely(rc == -EINVAL)) - handle_invalid_req_id(tx_ring, req_id, NULL, - false); + handle_invalid_req_id(tx_ring, req_id, NULL, false); break; } @@ -1409,15 +1409,14 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) return tx_pkts; } -static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag) +static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag, u16 len) { struct sk_buff *skb; if (!first_frag) - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_copybreak); + skb = napi_alloc_skb(rx_ring->napi, len); else - skb = build_skb(first_frag, ENA_PAGE_SIZE); + skb = napi_build_skb(first_frag, len); if (unlikely(!skb)) { ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1, @@ -1426,24 +1425,47 @@ static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag) netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, "Failed to allocate skb. first_frag %s\n", first_frag ? "provided" : "not provided"); - return NULL; } return skb; } +static bool ena_try_rx_buf_page_reuse(struct ena_rx_buffer *rx_info, u16 buf_len, + u16 len, int pkt_offset) +{ + struct ena_com_buf *ena_buf = &rx_info->ena_buf; + + /* More than ENA_MIN_RX_BUF_SIZE left in the reused buffer + * for data + headroom + tailroom. + */ + if (SKB_DATA_ALIGN(len + pkt_offset) + ENA_MIN_RX_BUF_SIZE <= ena_buf->len) { + page_ref_inc(rx_info->page); + rx_info->page_offset += buf_len; + ena_buf->paddr += buf_len; + ena_buf->len -= buf_len; + return true; + } + + return false; +} + static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, u32 descs, u16 *next_to_clean) { + int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + bool is_xdp_loaded = ena_xdp_present_ring(rx_ring); struct ena_rx_buffer *rx_info; struct ena_adapter *adapter; + int page_offset, pkt_offset; + dma_addr_t pre_reuse_paddr; u16 len, req_id, buf = 0; + bool reuse_rx_buf_page; struct sk_buff *skb; - void *page_addr; - u32 page_offset; - void *data_addr; + void *buf_addr; + int buf_offset; + u16 buf_len; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; @@ -1455,10 +1477,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, netif_err(adapter, rx_err, rx_ring->netdev, "Page is NULL. qid %u req_id %u\n", rx_ring->qid, req_id); ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp); - adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; - /* Make sure reset reason is set before triggering the reset */ - smp_mb__before_atomic(); - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID); return NULL; } @@ -1466,34 +1485,25 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, "rx_info %p page %p\n", rx_info, rx_info->page); - /* save virt address of first buffer */ - page_addr = page_address(rx_info->page); + buf_offset = rx_info->buf_offset; + pkt_offset = buf_offset - rx_ring->rx_headroom; page_offset = rx_info->page_offset; - data_addr = page_addr + page_offset; - - prefetch(data_addr); + buf_addr = page_address(rx_info->page) + page_offset; if (len <= rx_ring->rx_copybreak) { - skb = ena_alloc_skb(rx_ring, NULL); + skb = ena_alloc_skb(rx_ring, NULL, len); if (unlikely(!skb)) return NULL; - netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, - "RX allocated small packet. len %d. data_len %d\n", - skb->len, skb->data_len); - - /* sync this buffer for CPU use */ - dma_sync_single_for_cpu(rx_ring->dev, - dma_unmap_addr(&rx_info->ena_buf, paddr), - len, - DMA_FROM_DEVICE); - skb_copy_to_linear_data(skb, data_addr, len); + skb_copy_to_linear_data(skb, buf_addr + buf_offset, len); dma_sync_single_for_device(rx_ring->dev, - dma_unmap_addr(&rx_info->ena_buf, paddr), + dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, len, DMA_FROM_DEVICE); skb_put(skb, len); + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "RX allocated small packet. len %d.\n", skb->len); skb->protocol = eth_type_trans(skb, rx_ring->netdev); rx_ring->free_ids[*next_to_clean] = req_id; *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, @@ -1501,14 +1511,21 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, return skb; } - ena_unmap_rx_buff(rx_ring, rx_info); + buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom); + + /* If XDP isn't loaded try to reuse part of the RX buffer */ + reuse_rx_buf_page = !is_xdp_loaded && + ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset); + + if (!reuse_rx_buf_page) + ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC); - skb = ena_alloc_skb(rx_ring, page_addr); + skb = ena_alloc_skb(rx_ring, buf_addr, buf_len); if (unlikely(!skb)) return NULL; /* Populate skb's linear part */ - skb_reserve(skb, page_offset); + skb_reserve(skb, buf_offset); skb_put(skb, len); skb->protocol = eth_type_trans(skb, rx_ring->netdev); @@ -1517,7 +1534,8 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, "RX skb updated. len %d. data_len %d\n", skb->len, skb->data_len); - rx_info->page = NULL; + if (!reuse_rx_buf_page) + rx_info->page = NULL; rx_ring->free_ids[*next_to_clean] = req_id; *next_to_clean = @@ -1532,10 +1550,27 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, rx_info = &rx_ring->rx_buffer_info[req_id]; - ena_unmap_rx_buff(rx_ring, rx_info); + /* rx_info->buf_offset includes rx_ring->rx_headroom */ + buf_offset = rx_info->buf_offset; + pkt_offset = buf_offset - rx_ring->rx_headroom; + buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom); + page_offset = rx_info->page_offset; + + pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr); + + reuse_rx_buf_page = !is_xdp_loaded && + ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset); + + dma_sync_single_for_cpu(rx_ring->dev, + pre_reuse_paddr + pkt_offset, + len, + DMA_FROM_DEVICE); + + if (!reuse_rx_buf_page) + ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, - rx_info->page_offset, len, ENA_PAGE_SIZE); + page_offset + buf_offset, len, buf_len); } while (1); @@ -1641,14 +1676,14 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; xdp_prepare_buff(xdp, page_address(rx_info->page), - rx_info->page_offset, + rx_info->buf_offset, rx_ring->ena_bufs[0].len, false); ret = ena_xdp_execute(rx_ring, xdp); /* The xdp program might expand the headers */ if (ret == ENA_XDP_PASS) { - rx_info->page_offset = xdp->data - xdp->data_hard_start; + rx_info->buf_offset = xdp->data - xdp->data_hard_start; rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; } @@ -1677,6 +1712,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, int xdp_flags = 0; int total_len = 0; int xdp_verdict; + u8 pkt_offset; int rc = 0; int i; @@ -1703,13 +1739,19 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, /* First descriptor might have an offset set by the device */ rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; - rx_info->page_offset += ena_rx_ctx.pkt_offset; + pkt_offset = ena_rx_ctx.pkt_offset; + rx_info->buf_offset += pkt_offset; netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + dma_sync_single_for_cpu(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, + rx_ring->ena_bufs[0].len, + DMA_FROM_DEVICE); + if (ena_xdp_present_ring(rx_ring)) xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs); @@ -1733,8 +1775,9 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, * from RX side. */ if (xdp_verdict & ENA_XDP_FORWARDED) { - ena_unmap_rx_buff(rx_ring, - &rx_ring->rx_buffer_info[req_id]); + ena_unmap_rx_buff_attrs(rx_ring, + &rx_ring->rx_buffer_info[req_id], + DMA_ATTR_SKIP_CPU_SYNC); rx_ring->rx_buffer_info[req_id].page = NULL; } } @@ -1796,17 +1839,13 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, adapter = netdev_priv(rx_ring->netdev); if (rc == -ENOSPC) { - ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1, - &rx_ring->syncp); - adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; + ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1, &rx_ring->syncp); + ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS); } else { ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp); - adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; + ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID); } - - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); - return 0; } @@ -2347,8 +2386,7 @@ static int ena_rss_configure(struct ena_adapter *adapter) if (!ena_dev->rss.tbl_log_size) { rc = ena_rss_init_default(adapter); if (rc && (rc != -EOPNOTSUPP)) { - netif_err(adapter, ifup, adapter->netdev, - "Failed to init RSS rc: %d\n", rc); + netif_err(adapter, ifup, adapter->netdev, "Failed to init RSS rc: %d\n", rc); return rc; } } @@ -3221,7 +3259,8 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pd ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK | ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK | - ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; + ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK | + ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_MASK; rc = ena_com_set_host_attributes(ena_dev); if (rc) { @@ -3264,8 +3303,7 @@ static void ena_config_debug_area(struct ena_adapter *adapter) rc = ena_com_set_host_attributes(adapter->ena_dev); if (rc) { if (rc == -EOPNOTSUPP) - netif_warn(adapter, drv, adapter->netdev, - "Cannot set host attributes\n"); + netif_warn(adapter, drv, adapter->netdev, "Cannot set host attributes\n"); else netif_err(adapter, drv, adapter->netdev, "Cannot set host attributes\n"); @@ -3732,9 +3770,8 @@ static int check_for_rx_interrupt_queue(struct ena_adapter *adapter, netif_err(adapter, rx_err, adapter->netdev, "Potential MSIX issue on Rx side Queue = %d. Reset the device\n", rx_ring->qid); - adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT; - smp_mb__before_atomic(); - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + + ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT); return -EIO; } @@ -3771,9 +3808,7 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter, netif_err(adapter, tx_err, adapter->netdev, "Potential MSIX issue on Tx side Queue = %d. Reset the device\n", tx_ring->qid); - adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT; - smp_mb__before_atomic(); - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT); return -EIO; } @@ -3799,9 +3834,7 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter, "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n", missed_tx, adapter->missing_tx_completion_threshold); - adapter->reset_reason = - ENA_REGS_RESET_MISS_TX_CMPL; - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + ena_reset_device(adapter, ENA_REGS_RESET_MISS_TX_CMPL); rc = -EIO; } @@ -3815,10 +3848,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter) { struct ena_ring *tx_ring; struct ena_ring *rx_ring; - int i, budget, rc; + int qid, budget, rc; int io_queue_count; io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; + /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -3831,27 +3865,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter) if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) return; - budget = ENA_MONITORED_TX_QUEUES; + budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES); - for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { - tx_ring = &adapter->tx_ring[i]; - rx_ring = &adapter->rx_ring[i]; + qid = adapter->last_monitored_tx_qid; + + while (budget) { + qid = (qid + 1) % io_queue_count; + + tx_ring = &adapter->tx_ring[qid]; + rx_ring = &adapter->rx_ring[qid]; rc = check_missing_comp_in_tx_queue(adapter, tx_ring); if (unlikely(rc)) return; - rc = !ENA_IS_XDP_INDEX(adapter, i) ? + rc = !ENA_IS_XDP_INDEX(adapter, qid) ? check_for_rx_interrupt_queue(adapter, rx_ring) : 0; if (unlikely(rc)) return; budget--; - if (!budget) - break; } - adapter->last_monitored_tx_qid = i % io_queue_count; + adapter->last_monitored_tx_qid = qid; } /* trigger napi schedule after 2 consecutive detections */ @@ -3922,8 +3958,7 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter) "Keep alive watchdog timeout.\n"); ena_increase_stat(&adapter->dev_stats.wd_expired, 1, &adapter->syncp); - adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO; - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + ena_reset_device(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); } } @@ -3934,8 +3969,7 @@ static void check_for_admin_com_state(struct ena_adapter *adapter) "ENA admin queue is not in running state!\n"); ena_increase_stat(&adapter->dev_stats.admin_q_pause, 1, &adapter->syncp); - adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO; - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + ena_reset_device(adapter, ENA_REGS_RESET_ADMIN_TO); } } @@ -4141,8 +4175,8 @@ static int ena_rss_init_default(struct ena_adapter *adapter) } } - rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL, - ENA_HASH_KEY_SIZE, 0xFFFFFFFF); + rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL, ENA_HASH_KEY_SIZE, + 0xFFFFFFFF); if (unlikely(rc && (rc != -EOPNOTSUPP))) { dev_err(dev, "Cannot fill hash function\n"); goto err_fill_indir; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index bf2a39c91c..de54815845 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -50,6 +50,8 @@ #define ENA_DEFAULT_RING_SIZE (1024) #define ENA_MIN_RING_SIZE (256) +#define ENA_MIN_RX_BUF_SIZE (2048) + #define ENA_MIN_NUM_IO_QUEUES (1) #define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) @@ -186,7 +188,9 @@ struct ena_tx_buffer { struct ena_rx_buffer { struct sk_buff *skb; struct page *page; + dma_addr_t dma_addr; u32 page_offset; + u32 buf_offset; struct ena_com_buf ena_buf; } ____cacheline_aligned; @@ -410,6 +414,15 @@ int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak); int ena_get_sset_count(struct net_device *netdev, int sset); +static inline void ena_reset_device(struct ena_adapter *adapter, + enum ena_regs_reset_reason_types reset_reason) +{ + adapter->reset_reason = reset_reason; + /* Make sure reset reason is set before triggering the reset */ + smp_mb__before_atomic(); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); +} + enum ena_xdp_errors_t { ENA_XDP_ALLOWED = 0, ENA_XDP_CURRENT_MTU_TOO_LARGE, diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 485d32dda5..ce370ef641 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2029,12 +2029,14 @@ static int b44_set_pauseparam(struct net_device *dev, bp->flags |= B44_FLAG_TX_PAUSE; else bp->flags &= ~B44_FLAG_TX_PAUSE; - if (bp->flags & B44_FLAG_PAUSE_AUTO) { - b44_halt(bp); - b44_init_rings(bp); - b44_init_hw(bp, B44_FULL_RESET); - } else { - __b44_set_flow_ctrl(bp, bp->flags); + if (netif_running(dev)) { + if (bp->flags & B44_FLAG_PAUSE_AUTO) { + b44_halt(bp); + b44_init_rings(bp); + b44_init_hw(bp, B44_FULL_RESET); + } else { + __b44_set_flow_ctrl(bp, bp->flags); + } } spin_unlock_irq(&bp->lock); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index a2b736a9d2..4b8bb99b58 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) controller driver * - * Copyright (c) 2014-2020 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #define pr_fmt(fmt) "bcmgenet: " fmt @@ -2424,14 +2424,18 @@ static void umac_enable_set(struct bcmgenet_priv *priv, u32 mask, bool enable) { u32 reg; + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); - if (reg & CMD_SW_RESET) + if (reg & CMD_SW_RESET) { + spin_unlock_bh(&priv->reg_lock); return; + } if (enable) reg |= mask; else reg &= ~mask; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); /* UniMAC stops on a packet boundary, wait for a full-size packet * to be processed @@ -2447,8 +2451,10 @@ static void reset_umac(struct bcmgenet_priv *priv) udelay(10); /* issue soft reset and disable MAC while updating its registers */ + spin_lock_bh(&priv->reg_lock); bcmgenet_umac_writel(priv, CMD_SW_RESET, UMAC_CMD); udelay(2); + spin_unlock_bh(&priv->reg_lock); } static void bcmgenet_intr_disable(struct bcmgenet_priv *priv) @@ -3256,7 +3262,7 @@ static void bcmgenet_get_hw_addr(struct bcmgenet_priv *priv, } /* Returns a reusable dma control register value */ -static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv) +static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv, bool flush_rx) { unsigned int i; u32 reg; @@ -3281,6 +3287,14 @@ static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv) udelay(10); bcmgenet_umac_writel(priv, 0, UMAC_TX_FLUSH); + if (flush_rx) { + reg = bcmgenet_rbuf_ctrl_get(priv); + bcmgenet_rbuf_ctrl_set(priv, reg | BIT(0)); + udelay(10); + bcmgenet_rbuf_ctrl_set(priv, reg); + udelay(10); + } + return dma_ctrl; } @@ -3302,7 +3316,9 @@ static void bcmgenet_netif_start(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); /* Start the network engine */ + netif_addr_lock_bh(dev); bcmgenet_set_rx_mode(dev); + netif_addr_unlock_bh(dev); bcmgenet_enable_rx_napi(priv); umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true); @@ -3344,8 +3360,8 @@ static int bcmgenet_open(struct net_device *dev) bcmgenet_set_hw_addr(priv, dev->dev_addr); - /* Disable RX/TX DMA and flush TX queues */ - dma_ctrl = bcmgenet_dma_disable(priv); + /* Disable RX/TX DMA and flush TX and RX queues */ + dma_ctrl = bcmgenet_dma_disable(priv, true); /* Reinitialize TDMA and RDMA and SW housekeeping */ ret = bcmgenet_init_dma(priv); @@ -3566,16 +3582,19 @@ static void bcmgenet_set_rx_mode(struct net_device *dev) * 3. The number of filters needed exceeds the number filters * supported by the hardware. */ + spin_lock(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); if ((dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) || (nfilter > MAX_MDF_FILTER)) { reg |= CMD_PROMISC; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock(&priv->reg_lock); bcmgenet_umac_writel(priv, 0, UMAC_MDF_CTRL); return; } else { reg &= ~CMD_PROMISC; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock(&priv->reg_lock); } /* update MDF filter */ @@ -3969,6 +3988,7 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } + spin_lock_init(&priv->reg_lock); spin_lock_init(&priv->lock); SET_NETDEV_DEV(dev, &pdev->dev); @@ -4201,7 +4221,7 @@ static int bcmgenet_resume(struct device *d) bcmgenet_hfb_create_rxnfc_filter(priv, rule); /* Disable RX/TX DMA and flush TX queues */ - dma_ctrl = bcmgenet_dma_disable(priv); + dma_ctrl = bcmgenet_dma_disable(priv, false); /* Reinitialize TDMA and RDMA and SW housekeeping */ ret = bcmgenet_init_dma(priv); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 0eeb304a42..34a3c448d4 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -572,6 +572,8 @@ struct bcmgenet_rxnfc_rule { /* device context */ struct bcmgenet_priv { void __iomem *base; + /* reg_lock: lock to serialize access to shared registers */ + spinlock_t reg_lock; enum bcmgenet_version version; struct net_device *dev; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index f55d9d9c01..38d41028e9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -133,6 +133,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, } /* Can't suspend with WoL if MAC is still in reset */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); if (reg & CMD_SW_RESET) reg &= ~CMD_SW_RESET; @@ -140,6 +141,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, /* disable RX */ reg &= ~CMD_RX_EN; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); mdelay(10); if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) { @@ -185,6 +187,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, } /* Enable CRC forward */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); priv->crc_fwd_en = 1; reg |= CMD_CRC_FWD; @@ -192,6 +195,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, /* Receiver must be enabled for WOL MP detection */ reg |= CMD_RX_EN; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); reg = UMAC_IRQ_MPD_R; if (hfb_enable) @@ -238,7 +242,9 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, } /* Disable CRC Forward */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); reg &= ~CMD_CRC_FWD; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); } diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 51f6c94e91..8c743e67d9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -91,6 +91,7 @@ void bcmgenet_mii_setup(struct net_device *dev) reg |= RGMII_LINK; bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); reg &= ~((CMD_SPEED_MASK << CMD_SPEED_SHIFT) | CMD_HD_EN | @@ -103,6 +104,7 @@ void bcmgenet_mii_setup(struct net_device *dev) reg |= CMD_TX_EN | CMD_RX_EN; } bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); priv->eee.eee_active = phy_init_eee(phydev, 0) >= 0; bcmgenet_eee_enable_set(dev, @@ -264,6 +266,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) * block for the interface to work */ if (priv->ext_phy) { + mutex_lock(&phydev->lock); reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); reg &= ~ID_MODE_DIS; reg |= id_mode_dis; @@ -272,6 +275,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) else reg |= RGMII_MODE_EN; bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + mutex_unlock(&phydev->lock); } if (init) diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c index 04ad0f2b96..777f0d7e48 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c +++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c @@ -312,7 +312,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf, void *kern_buf; /* Copy the user space buf */ - kern_buf = memdup_user(buf, nbytes); + kern_buf = memdup_user_nul(buf, nbytes); if (IS_ERR(kern_buf)) return PTR_ERR(kern_buf); @@ -372,7 +372,7 @@ bnad_debugfs_write_regwr(struct file *file, const char __user *buf, void *kern_buf; /* Copy the user space buf */ - kern_buf = memdup_user(buf, nbytes); + kern_buf = memdup_user_nul(buf, nbytes); if (IS_ERR(kern_buf)) return PTR_ERR(kern_buf); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index fa5b596ff2..a074e9d442 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2682,12 +2682,12 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) lb->loopback = 1; q = &adap->sge.ethtxq[pi->first_qset]; - __netif_tx_lock(q->txq, smp_processor_id()); + __netif_tx_lock_bh(q->txq); reclaim_completed_tx(adap, &q->q, -1, true); credits = txq_avail(&q->q) - ndesc; if (unlikely(credits < 0)) { - __netif_tx_unlock(q->txq); + __netif_tx_unlock_bh(q->txq); return -ENOMEM; } @@ -2722,7 +2722,7 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) init_completion(&lb->completion); txq_advance(&q->q, ndesc); cxgb4_ring_tx_db(adap, &q->q, ndesc); - __netif_tx_unlock(q->txq); + __netif_tx_unlock_bh(q->txq); /* wait for the pkt to return */ ret = wait_for_completion_timeout(&lb->completion, 10 * HZ); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index d0a8f71069..52bc164a1c 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1117,18 +1117,30 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, pp->request = nla_get_u8(port[IFLA_PORT_REQUEST]); if (port[IFLA_PORT_PROFILE]) { + if (nla_len(port[IFLA_PORT_PROFILE]) != PORT_PROFILE_MAX) { + memcpy(pp, &prev_pp, sizeof(*pp)); + return -EINVAL; + } pp->set |= ENIC_SET_NAME; memcpy(pp->name, nla_data(port[IFLA_PORT_PROFILE]), PORT_PROFILE_MAX); } if (port[IFLA_PORT_INSTANCE_UUID]) { + if (nla_len(port[IFLA_PORT_INSTANCE_UUID]) != PORT_UUID_MAX) { + memcpy(pp, &prev_pp, sizeof(*pp)); + return -EINVAL; + } pp->set |= ENIC_SET_INSTANCE; memcpy(pp->instance_uuid, nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX); } if (port[IFLA_PORT_HOST_UUID]) { + if (nla_len(port[IFLA_PORT_HOST_UUID]) != PORT_UUID_MAX) { + memcpy(pp, &prev_pp, sizeof(*pp)); + return -EINVAL; + } pp->set |= ENIC_SET_HOST; memcpy(pp->host_uuid, nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX); diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 675c6dda45..0c8c92ff77 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1108,10 +1108,13 @@ static void gmac_tx_irq_enable(struct net_device *netdev, { struct gemini_ethernet_port *port = netdev_priv(netdev); struct gemini_ethernet *geth = port->geth; + unsigned long flags; u32 val, mask; netdev_dbg(netdev, "%s device %d\n", __func__, netdev->dev_id); + spin_lock_irqsave(&geth->irq_lock, flags); + mask = GMAC0_IRQ0_TXQ0_INTS << (6 * netdev->dev_id + txq); if (en) @@ -1120,6 +1123,8 @@ static void gmac_tx_irq_enable(struct net_device *netdev, val = readl(geth->base + GLOBAL_INTERRUPT_ENABLE_0_REG); val = en ? val | mask : val & ~mask; writel(val, geth->base + GLOBAL_INTERRUPT_ENABLE_0_REG); + + spin_unlock_irqrestore(&geth->irq_lock, flags); } static void gmac_tx_irq(struct net_device *netdev, unsigned int txq_num) @@ -1426,15 +1431,19 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) union gmac_rxdesc_3 word3; struct page *page = NULL; unsigned int page_offs; + unsigned long flags; unsigned short r, w; union dma_rwptr rw; dma_addr_t mapping; int frag_nr = 0; + spin_lock_irqsave(&geth->irq_lock, flags); rw.bits32 = readl(ptr_reg); /* Reset interrupt as all packages until here are taken into account */ writel(DEFAULT_Q0_INT_BIT << netdev->dev_id, geth->base + GLOBAL_INTERRUPT_STATUS_1_REG); + spin_unlock_irqrestore(&geth->irq_lock, flags); + r = rw.bits.rptr; w = rw.bits.wptr; @@ -1737,10 +1746,9 @@ static irqreturn_t gmac_irq(int irq, void *data) gmac_update_hw_stats(netdev); if (val & (GMAC0_RX_OVERRUN_INT_BIT << (netdev->dev_id * 8))) { + spin_lock(&geth->irq_lock); writel(GMAC0_RXDERR_INT_BIT << (netdev->dev_id * 8), geth->base + GLOBAL_INTERRUPT_STATUS_4_REG); - - spin_lock(&geth->irq_lock); u64_stats_update_begin(&port->ir_stats_syncp); ++port->stats.rx_fifo_errors; u64_stats_update_end(&port->ir_stats_syncp); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 39875ccbaa..f02376555e 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2132,8 +2132,6 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; - phy_dev->mac_managed_pm = 1; - phy_attached_info(phy_dev); return 0; @@ -2145,10 +2143,12 @@ static int fec_enet_mii_init(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); bool suppress_preamble = false; + struct phy_device *phydev; struct device_node *node; int err = -ENXIO; u32 mii_speed, holdtime; u32 bus_freq; + int addr; /* * The i.MX28 dual fec interfaces are not equal. @@ -2258,6 +2258,13 @@ static int fec_enet_mii_init(struct platform_device *pdev) goto err_out_free_mdiobus; of_node_put(node); + /* find all the PHY devices on the bus and set mac_managed_pm to true */ + for (addr = 0; addr < PHY_MAX_ADDR; addr++) { + phydev = mdiobus_get_phy(fep->mii_bus, addr); + if (phydev) + phydev->mac_managed_pm = true; + } + mii_cnt++; /* save fec0 mii_bus */ @@ -3620,6 +3627,14 @@ static int fec_enet_init(struct net_device *ndev) return ret; } +static void fec_enet_deinit(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + netif_napi_del(&fep->napi); + fec_enet_free_queue(ndev); +} + #ifdef CONFIG_OF static int fec_reset_phy(struct platform_device *pdev) { @@ -4016,6 +4031,7 @@ fec_probe(struct platform_device *pdev) fec_enet_mii_remove(fep); failed_mii_init: failed_irq: + fec_enet_deinit(ndev); failed_init: fec_ptp_stop(pdev); failed_reset: @@ -4078,6 +4094,7 @@ fec_drv_remove(struct platform_device *pdev) pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); + fec_enet_deinit(ndev); free_netdev(ndev); return 0; } diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index c5ae673005..780fbb3e1e 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -103,14 +103,13 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable) u64 ns; val = 0; - if (fep->pps_enable == enable) - return 0; - - fep->pps_channel = DEFAULT_PPS_CHANNEL; - fep->reload_period = PPS_OUPUT_RELOAD_PERIOD; - spin_lock_irqsave(&fep->tmreg_lock, flags); + if (fep->pps_enable == enable) { + spin_unlock_irqrestore(&fep->tmreg_lock, flags); + return 0; + } + if (enable) { /* clear capture or output compare interrupt status if have. */ @@ -441,6 +440,9 @@ static int fec_ptp_enable(struct ptp_clock_info *ptp, int ret = 0; if (rq->type == PTP_CLK_REQ_PPS) { + fep->pps_channel = DEFAULT_PPS_CHANNEL; + fep->reload_period = PPS_OUPUT_RELOAD_PERIOD; + ret = fec_ptp_enable_pps(fep, on); return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/Makefile b/drivers/net/ethernet/hisilicon/hns3/Makefile index 7aa2fac76c..cb3aaf5252 100644 --- a/drivers/net/ethernet/hisilicon/hns3/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/Makefile @@ -4,9 +4,9 @@ # ccflags-y += -I$(srctree)/$(src) - -obj-$(CONFIG_HNS3) += hns3pf/ -obj-$(CONFIG_HNS3) += hns3vf/ +ccflags-y += -I$(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3pf +ccflags-y += -I$(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3vf +ccflags-y += -I$(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3_common obj-$(CONFIG_HNS3) += hnae3.o @@ -14,3 +14,15 @@ obj-$(CONFIG_HNS3_ENET) += hns3.o hns3-objs = hns3_enet.o hns3_ethtool.o hns3_debugfs.o hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o + +obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o + +hclgevf-objs = hns3vf/hclgevf_main.o hns3vf/hclgevf_cmd.o hns3vf/hclgevf_mbx.o hns3vf/hclgevf_devlink.o \ + hns3_common/hclge_comm_cmd.o + +obj-$(CONFIG_HNS3_HCLGE) += hclge.o +hclge-objs = hns3pf/hclge_main.o hns3pf/hclge_cmd.o hns3pf/hclge_mdio.o hns3pf/hclge_tm.o \ + hns3pf/hclge_mbx.o hns3pf/hclge_err.o hns3pf/hclge_debugfs.o hns3pf/hclge_ptp.o hns3pf/hclge_devlink.o \ + hns3_common/hclge_comm_cmd.o + +hclge-$(CONFIG_HNS3_DCB) += hns3pf/hclge_dcb.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 277d6d657c..debbaa1822 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -46,6 +46,7 @@ enum HCLGE_MBX_OPCODE { HCLGE_MBX_PUSH_PROMISC_INFO, /* (PF -> VF) push vf promisc info */ HCLGE_MBX_VF_UNINIT, /* (VF -> PF) vf is unintializing */ HCLGE_MBX_HANDLE_VF_TBL, /* (VF -> PF) store/clear hw table */ + HCLGE_MBX_GET_RING_VECTOR_MAP, /* (VF -> PF) get ring-to-vector map */ HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf flr status */ HCLGE_MBX_PUSH_LINK_STATUS, /* (M7 -> PF) get port link status */ @@ -80,6 +81,9 @@ enum hclge_mbx_tbl_cfg_subcode { #define HCLGE_MBX_MAX_RESP_DATA_SIZE 8U #define HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM 4 +#define HCLGE_RESET_SCHED_TIMEOUT (3 * HZ) +#define HCLGE_MBX_SCHED_TIMEOUT (HZ / 2) + struct hclge_ring_chain_param { u8 ring_type; u8 tqp_index; @@ -208,6 +212,17 @@ struct hclgevf_mbx_arq_ring { __le16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE]; }; +struct hclge_dev; + +#define HCLGE_MBX_OPCODE_MAX 256 +struct hclge_mbx_ops_param { + struct hclge_vport *vport; + struct hclge_mbx_vf_to_pf_cmd *req; + struct hclge_respond_to_vf_msg *resp_msg; +}; + +typedef int (*hclge_mbx_ops_fn)(struct hclge_mbx_ops_param *param); + #define hclge_mbx_ring_ptr_move_crq(crq) \ (crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num) #define hclge_mbx_tail_ptr_move_arq(arq) \ diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index b51afb83d0..f362a2fac3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -341,6 +341,7 @@ struct hnae3_dev_specs { u8 max_non_tso_bd_num; /* max BD number of one non-TSO packet */ u16 max_frm_size; u16 max_qset_num; + u16 umv_size; }; struct hnae3_client_ops { @@ -828,7 +829,7 @@ struct hnae3_handle { struct hnae3_roce_private_info rinfo; }; - u32 numa_node_mask; /* for multi-chip support */ + nodemask_t numa_node_mask; /* for multi-chip support */ enum hnae3_port_base_vlan_state port_base_vlan_state; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c new file mode 100644 index 0000000000..89e999248b --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (c) 2021-2021 Hisilicon Limited. + +#include "hnae3.h" +#include "hclge_comm_cmd.h" + +static bool hclge_is_elem_in_array(const u16 *spec_opcode, u32 size, u16 opcode) +{ + u32 i; + + for (i = 0; i < size; i++) { + if (spec_opcode[i] == opcode) + return true; + } + + return false; +} + +static const u16 pf_spec_opcode[] = { HCLGE_COMM_OPC_STATS_64_BIT, + HCLGE_COMM_OPC_STATS_32_BIT, + HCLGE_COMM_OPC_STATS_MAC, + HCLGE_COMM_OPC_STATS_MAC_ALL, + HCLGE_COMM_OPC_QUERY_32_BIT_REG, + HCLGE_COMM_OPC_QUERY_64_BIT_REG, + HCLGE_COMM_QUERY_CLEAR_MPF_RAS_INT, + HCLGE_COMM_QUERY_CLEAR_PF_RAS_INT, + HCLGE_COMM_QUERY_CLEAR_ALL_MPF_MSIX_INT, + HCLGE_COMM_QUERY_CLEAR_ALL_PF_MSIX_INT, + HCLGE_COMM_QUERY_ALL_ERR_INFO }; + +static const u16 vf_spec_opcode[] = { HCLGE_COMM_OPC_STATS_64_BIT, + HCLGE_COMM_OPC_STATS_32_BIT, + HCLGE_COMM_OPC_STATS_MAC }; + +static bool hclge_comm_is_special_opcode(u16 opcode, bool is_pf) +{ + /* these commands have several descriptors, + * and use the first one to save opcode and return value + */ + const u16 *spec_opcode = is_pf ? pf_spec_opcode : vf_spec_opcode; + u32 size = is_pf ? ARRAY_SIZE(pf_spec_opcode) : + ARRAY_SIZE(vf_spec_opcode); + + return hclge_is_elem_in_array(spec_opcode, size, opcode); +} + +static int hclge_comm_ring_space(struct hclge_comm_cmq_ring *ring) +{ + int ntc = ring->next_to_clean; + int ntu = ring->next_to_use; + int used = (ntu - ntc + ring->desc_num) % ring->desc_num; + + return ring->desc_num - used - 1; +} + +static void hclge_comm_cmd_copy_desc(struct hclge_comm_hw *hw, + struct hclge_desc *desc, int num) +{ + struct hclge_desc *desc_to_use; + int handle = 0; + + while (handle < num) { + desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use]; + *desc_to_use = desc[handle]; + (hw->cmq.csq.next_to_use)++; + if (hw->cmq.csq.next_to_use >= hw->cmq.csq.desc_num) + hw->cmq.csq.next_to_use = 0; + handle++; + } +} + +static int hclge_comm_is_valid_csq_clean_head(struct hclge_comm_cmq_ring *ring, + int head) +{ + int ntc = ring->next_to_clean; + int ntu = ring->next_to_use; + + if (ntu > ntc) + return head >= ntc && head <= ntu; + + return head >= ntc || head <= ntu; +} + +static int hclge_comm_cmd_csq_clean(struct hclge_comm_hw *hw) +{ + struct hclge_comm_cmq_ring *csq = &hw->cmq.csq; + int clean; + u32 head; + + head = hclge_comm_read_dev(hw, HCLGE_COMM_NIC_CSQ_HEAD_REG); + rmb(); /* Make sure head is ready before touch any data */ + + if (!hclge_comm_is_valid_csq_clean_head(csq, head)) { + dev_warn(&hw->cmq.csq.pdev->dev, "wrong cmd head (%u, %d-%d)\n", + head, csq->next_to_use, csq->next_to_clean); + dev_warn(&hw->cmq.csq.pdev->dev, + "Disabling any further commands to IMP firmware\n"); + set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hw->comm_state); + dev_warn(&hw->cmq.csq.pdev->dev, + "IMP firmware watchdog reset soon expected!\n"); + return -EIO; + } + + clean = (head - csq->next_to_clean + csq->desc_num) % csq->desc_num; + csq->next_to_clean = head; + return clean; +} + +static int hclge_comm_cmd_csq_done(struct hclge_comm_hw *hw) +{ + u32 head = hclge_comm_read_dev(hw, HCLGE_COMM_NIC_CSQ_HEAD_REG); + return head == hw->cmq.csq.next_to_use; +} + +static void hclge_comm_wait_for_resp(struct hclge_comm_hw *hw, + bool *is_completed) +{ + u32 timeout = 0; + + do { + if (hclge_comm_cmd_csq_done(hw)) { + *is_completed = true; + break; + } + udelay(1); + timeout++; + } while (timeout < hw->cmq.tx_timeout); +} + +static int hclge_comm_cmd_convert_err_code(u16 desc_ret) +{ + struct hclge_comm_errcode hclge_comm_cmd_errcode[] = { + { HCLGE_COMM_CMD_EXEC_SUCCESS, 0 }, + { HCLGE_COMM_CMD_NO_AUTH, -EPERM }, + { HCLGE_COMM_CMD_NOT_SUPPORTED, -EOPNOTSUPP }, + { HCLGE_COMM_CMD_QUEUE_FULL, -EXFULL }, + { HCLGE_COMM_CMD_NEXT_ERR, -ENOSR }, + { HCLGE_COMM_CMD_UNEXE_ERR, -ENOTBLK }, + { HCLGE_COMM_CMD_PARA_ERR, -EINVAL }, + { HCLGE_COMM_CMD_RESULT_ERR, -ERANGE }, + { HCLGE_COMM_CMD_TIMEOUT, -ETIME }, + { HCLGE_COMM_CMD_HILINK_ERR, -ENOLINK }, + { HCLGE_COMM_CMD_QUEUE_ILLEGAL, -ENXIO }, + { HCLGE_COMM_CMD_INVALID, -EBADR }, + }; + u32 errcode_count = ARRAY_SIZE(hclge_comm_cmd_errcode); + u32 i; + + for (i = 0; i < errcode_count; i++) + if (hclge_comm_cmd_errcode[i].imp_errcode == desc_ret) + return hclge_comm_cmd_errcode[i].common_errno; + + return -EIO; +} + +static int hclge_comm_cmd_check_retval(struct hclge_comm_hw *hw, + struct hclge_desc *desc, int num, + int ntc, bool is_pf) +{ + u16 opcode, desc_ret; + int handle; + + opcode = le16_to_cpu(desc[0].opcode); + for (handle = 0; handle < num; handle++) { + desc[handle] = hw->cmq.csq.desc[ntc]; + ntc++; + if (ntc >= hw->cmq.csq.desc_num) + ntc = 0; + } + if (likely(!hclge_comm_is_special_opcode(opcode, is_pf))) + desc_ret = le16_to_cpu(desc[num - 1].retval); + else + desc_ret = le16_to_cpu(desc[0].retval); + + hw->cmq.last_status = desc_ret; + + return hclge_comm_cmd_convert_err_code(desc_ret); +} + +static int hclge_comm_cmd_check_result(struct hclge_comm_hw *hw, + struct hclge_desc *desc, + int num, int ntc, bool is_pf) +{ + bool is_completed = false; + int handle, ret; + + /* If the command is sync, wait for the firmware to write back, + * if multi descriptors to be sent, use the first one to check + */ + if (HCLGE_COMM_SEND_SYNC(le16_to_cpu(desc->flag))) + hclge_comm_wait_for_resp(hw, &is_completed); + + if (!is_completed) + ret = -EBADE; + else + ret = hclge_comm_cmd_check_retval(hw, desc, num, ntc, is_pf); + + /* Clean the command send queue */ + handle = hclge_comm_cmd_csq_clean(hw); + if (handle < 0) + ret = handle; + else if (handle != num) + dev_warn(&hw->cmq.csq.pdev->dev, + "cleaned %d, need to clean %d\n", handle, num); + return ret; +} + +/** + * hclge_comm_cmd_send - send command to command queue + * @hw: pointer to the hw struct + * @desc: prefilled descriptor for describing the command + * @num : the number of descriptors to be sent + * @is_pf: bool to judge pf/vf module + * + * This is the main send command for command queue, it + * sends the queue, cleans the queue, etc + **/ +int hclge_comm_cmd_send(struct hclge_comm_hw *hw, struct hclge_desc *desc, + int num, bool is_pf) +{ + struct hclge_comm_cmq_ring *csq = &hw->cmq.csq; + int ret; + int ntc; + + spin_lock_bh(&hw->cmq.csq.lock); + + if (test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hw->comm_state)) { + spin_unlock_bh(&hw->cmq.csq.lock); + return -EBUSY; + } + + if (num > hclge_comm_ring_space(&hw->cmq.csq)) { + /* If CMDQ ring is full, SW HEAD and HW HEAD may be different, + * need update the SW HEAD pointer csq->next_to_clean + */ + csq->next_to_clean = + hclge_comm_read_dev(hw, HCLGE_COMM_NIC_CSQ_HEAD_REG); + spin_unlock_bh(&hw->cmq.csq.lock); + return -EBUSY; + } + + /** + * Record the location of desc in the ring for this time + * which will be use for hardware to write back + */ + ntc = hw->cmq.csq.next_to_use; + + hclge_comm_cmd_copy_desc(hw, desc, num); + + /* Write to hardware */ + hclge_comm_write_dev(hw, HCLGE_COMM_NIC_CSQ_TAIL_REG, + hw->cmq.csq.next_to_use); + + ret = hclge_comm_cmd_check_result(hw, desc, num, ntc, is_pf); + + spin_unlock_bh(&hw->cmq.csq.lock); + + return ret; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h new file mode 100644 index 0000000000..5164c666ca --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +// Copyright (c) 2021-2021 Hisilicon Limited. + +#ifndef __HCLGE_COMM_CMD_H +#define __HCLGE_COMM_CMD_H +#include + +#include "hnae3.h" + +#define HCLGE_COMM_CMD_FLAG_NO_INTR BIT(4) + +#define HCLGE_COMM_SEND_SYNC(flag) \ + ((flag) & HCLGE_COMM_CMD_FLAG_NO_INTR) + +#define HCLGE_COMM_NIC_CSQ_TAIL_REG 0x27010 +#define HCLGE_COMM_NIC_CSQ_HEAD_REG 0x27014 + +enum hclge_comm_cmd_return_status { + HCLGE_COMM_CMD_EXEC_SUCCESS = 0, + HCLGE_COMM_CMD_NO_AUTH = 1, + HCLGE_COMM_CMD_NOT_SUPPORTED = 2, + HCLGE_COMM_CMD_QUEUE_FULL = 3, + HCLGE_COMM_CMD_NEXT_ERR = 4, + HCLGE_COMM_CMD_UNEXE_ERR = 5, + HCLGE_COMM_CMD_PARA_ERR = 6, + HCLGE_COMM_CMD_RESULT_ERR = 7, + HCLGE_COMM_CMD_TIMEOUT = 8, + HCLGE_COMM_CMD_HILINK_ERR = 9, + HCLGE_COMM_CMD_QUEUE_ILLEGAL = 10, + HCLGE_COMM_CMD_INVALID = 11, +}; + +enum hclge_comm_special_cmd { + HCLGE_COMM_OPC_STATS_64_BIT = 0x0030, + HCLGE_COMM_OPC_STATS_32_BIT = 0x0031, + HCLGE_COMM_OPC_STATS_MAC = 0x0032, + HCLGE_COMM_OPC_STATS_MAC_ALL = 0x0034, + HCLGE_COMM_OPC_QUERY_32_BIT_REG = 0x0041, + HCLGE_COMM_OPC_QUERY_64_BIT_REG = 0x0042, + HCLGE_COMM_QUERY_CLEAR_MPF_RAS_INT = 0x1511, + HCLGE_COMM_QUERY_CLEAR_PF_RAS_INT = 0x1512, + HCLGE_COMM_QUERY_CLEAR_ALL_MPF_MSIX_INT = 0x1514, + HCLGE_COMM_QUERY_CLEAR_ALL_PF_MSIX_INT = 0x1515, + HCLGE_COMM_QUERY_ALL_ERR_INFO = 0x1517, +}; + +enum hclge_comm_cmd_state { + HCLGE_COMM_STATE_CMD_DISABLE, +}; + +struct hclge_comm_errcode { + u32 imp_errcode; + int common_errno; +}; + +#define HCLGE_DESC_DATA_LEN 6 +struct hclge_desc { + __le16 opcode; + __le16 flag; + __le16 retval; + __le16 rsv; + __le32 data[HCLGE_DESC_DATA_LEN]; +}; + +struct hclge_comm_cmq_ring { + dma_addr_t desc_dma_addr; + struct hclge_desc *desc; + struct pci_dev *pdev; + u32 head; + u32 tail; + + u16 buf_size; + u16 desc_num; + int next_to_use; + int next_to_clean; + u8 ring_type; /* cmq ring type */ + spinlock_t lock; /* Command queue lock */ +}; + +enum hclge_comm_cmd_status { + HCLGE_COMM_STATUS_SUCCESS = 0, + HCLGE_COMM_ERR_CSQ_FULL = -1, + HCLGE_COMM_ERR_CSQ_TIMEOUT = -2, + HCLGE_COMM_ERR_CSQ_ERROR = -3, +}; + +struct hclge_comm_cmq { + struct hclge_comm_cmq_ring csq; + struct hclge_comm_cmq_ring crq; + u16 tx_timeout; + enum hclge_comm_cmd_status last_status; +}; + +struct hclge_comm_hw { + void __iomem *io_base; + void __iomem *mem_base; + struct hclge_comm_cmq cmq; + unsigned long comm_state; +}; + +static inline void hclge_comm_write_reg(void __iomem *base, u32 reg, u32 value) +{ + writel(value, base + reg); +} + +static inline u32 hclge_comm_read_reg(u8 __iomem *base, u32 reg) +{ + u8 __iomem *reg_addr = READ_ONCE(base); + + return readl(reg_addr + reg); +} + +#define hclge_comm_write_dev(a, reg, value) \ + hclge_comm_write_reg((a)->io_base, reg, value) +#define hclge_comm_read_dev(a, reg) \ + hclge_comm_read_reg((a)->io_base, reg) + +int hclge_comm_cmd_send(struct hclge_comm_hw *hw, struct hclge_desc *desc, + int num, bool is_pf); + +#endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 45f245b1d3..bd801e35d5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -924,6 +924,8 @@ hns3_dbg_dev_specs(struct hnae3_handle *h, char *buf, int len, int *pos) dev_specs->max_tm_rate); *pos += scnprintf(buf + *pos, len - *pos, "MAX QSET number: %u\n", dev_specs->max_qset_num); + *pos += scnprintf(buf + *pos, len - *pos, "umv size: %u\n", + dev_specs->umv_size); } static int hns3_dbg_dev_info(struct hnae3_handle *h, char *buf, int len) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile deleted file mode 100644 index d1bf5c4c0a..0000000000 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0+ -# -# Makefile for the HISILICON network device drivers. -# - -ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 -ccflags-y += -I $(srctree)/$(src) - -obj-$(CONFIG_HNS3_HCLGE) += hclge.o -hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge_debugfs.o hclge_ptp.o hclge_devlink.o - -hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 9c2eeaa822..59dd2283d2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -11,46 +11,24 @@ #include "hnae3.h" #include "hclge_main.h" -#define cmq_ring_to_dev(ring) (&(ring)->dev->pdev->dev) - -static int hclge_ring_space(struct hclge_cmq_ring *ring) -{ - int ntu = ring->next_to_use; - int ntc = ring->next_to_clean; - int used = (ntu - ntc + ring->desc_num) % ring->desc_num; - - return ring->desc_num - used - 1; -} - -static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int head) -{ - int ntu = ring->next_to_use; - int ntc = ring->next_to_clean; - - if (ntu > ntc) - return head >= ntc && head <= ntu; - - return head >= ntc || head <= ntu; -} - -static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring) +static int hclge_alloc_cmd_desc(struct hclge_comm_cmq_ring *ring) { int size = ring->desc_num * sizeof(struct hclge_desc); - ring->desc = dma_alloc_coherent(cmq_ring_to_dev(ring), size, - &ring->desc_dma_addr, GFP_KERNEL); + ring->desc = dma_alloc_coherent(&ring->pdev->dev, + size, &ring->desc_dma_addr, GFP_KERNEL); if (!ring->desc) return -ENOMEM; return 0; } -static void hclge_free_cmd_desc(struct hclge_cmq_ring *ring) +static void hclge_free_cmd_desc(struct hclge_comm_cmq_ring *ring) { int size = ring->desc_num * sizeof(struct hclge_desc); if (ring->desc) { - dma_free_coherent(cmq_ring_to_dev(ring), size, + dma_free_coherent(&ring->pdev->dev, size, ring->desc, ring->desc_dma_addr); ring->desc = NULL; } @@ -59,12 +37,13 @@ static void hclge_free_cmd_desc(struct hclge_cmq_ring *ring) static int hclge_alloc_cmd_queue(struct hclge_dev *hdev, int ring_type) { struct hclge_hw *hw = &hdev->hw; - struct hclge_cmq_ring *ring = - (ring_type == HCLGE_TYPE_CSQ) ? &hw->cmq.csq : &hw->cmq.crq; + struct hclge_comm_cmq_ring *ring = + (ring_type == HCLGE_TYPE_CSQ) ? &hw->hw.cmq.csq : + &hw->hw.cmq.crq; int ret; ring->ring_type = ring_type; - ring->dev = hdev; + ring->pdev = hdev->pdev; ret = hclge_alloc_cmd_desc(ring); if (ret) { @@ -96,11 +75,10 @@ void hclge_cmd_setup_basic_desc(struct hclge_desc *desc, desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_WR); } -static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring) +static void hclge_cmd_config_regs(struct hclge_hw *hw, + struct hclge_comm_cmq_ring *ring) { dma_addr_t dma = ring->desc_dma_addr; - struct hclge_dev *hdev = ring->dev; - struct hclge_hw *hw = &hdev->hw; u32 reg_val; if (ring->ring_type == HCLGE_TYPE_CSQ) { @@ -128,176 +106,8 @@ static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring) static void hclge_cmd_init_regs(struct hclge_hw *hw) { - hclge_cmd_config_regs(&hw->cmq.csq); - hclge_cmd_config_regs(&hw->cmq.crq); -} - -static int hclge_cmd_csq_clean(struct hclge_hw *hw) -{ - struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw); - struct hclge_cmq_ring *csq = &hw->cmq.csq; - u32 head; - int clean; - - head = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG); - rmb(); /* Make sure head is ready before touch any data */ - - if (!is_valid_csq_clean_head(csq, head)) { - dev_warn(&hdev->pdev->dev, "wrong cmd head (%u, %d-%d)\n", head, - csq->next_to_use, csq->next_to_clean); - dev_warn(&hdev->pdev->dev, - "Disabling any further commands to IMP firmware\n"); - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); - dev_warn(&hdev->pdev->dev, - "IMP firmware watchdog reset soon expected!\n"); - return -EIO; - } - - clean = (head - csq->next_to_clean + csq->desc_num) % csq->desc_num; - csq->next_to_clean = head; - return clean; -} - -static int hclge_cmd_csq_done(struct hclge_hw *hw) -{ - u32 head = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG); - return head == hw->cmq.csq.next_to_use; -} - -static bool hclge_is_special_opcode(u16 opcode) -{ - /* these commands have several descriptors, - * and use the first one to save opcode and return value - */ - static const u16 spec_opcode[] = { - HCLGE_OPC_STATS_64_BIT, - HCLGE_OPC_STATS_32_BIT, - HCLGE_OPC_STATS_MAC, - HCLGE_OPC_STATS_MAC_ALL, - HCLGE_OPC_QUERY_32_BIT_REG, - HCLGE_OPC_QUERY_64_BIT_REG, - HCLGE_QUERY_CLEAR_MPF_RAS_INT, - HCLGE_QUERY_CLEAR_PF_RAS_INT, - HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, - HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT, - HCLGE_QUERY_ALL_ERR_INFO - }; - int i; - - for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) { - if (spec_opcode[i] == opcode) - return true; - } - - return false; -} - -struct errcode { - u32 imp_errcode; - int common_errno; -}; - -static void hclge_cmd_copy_desc(struct hclge_hw *hw, struct hclge_desc *desc, - int num) -{ - struct hclge_desc *desc_to_use; - int handle = 0; - - while (handle < num) { - desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use]; - *desc_to_use = desc[handle]; - (hw->cmq.csq.next_to_use)++; - if (hw->cmq.csq.next_to_use >= hw->cmq.csq.desc_num) - hw->cmq.csq.next_to_use = 0; - handle++; - } -} - -static int hclge_cmd_convert_err_code(u16 desc_ret) -{ - struct errcode hclge_cmd_errcode[] = { - {HCLGE_CMD_EXEC_SUCCESS, 0}, - {HCLGE_CMD_NO_AUTH, -EPERM}, - {HCLGE_CMD_NOT_SUPPORTED, -EOPNOTSUPP}, - {HCLGE_CMD_QUEUE_FULL, -EXFULL}, - {HCLGE_CMD_NEXT_ERR, -ENOSR}, - {HCLGE_CMD_UNEXE_ERR, -ENOTBLK}, - {HCLGE_CMD_PARA_ERR, -EINVAL}, - {HCLGE_CMD_RESULT_ERR, -ERANGE}, - {HCLGE_CMD_TIMEOUT, -ETIME}, - {HCLGE_CMD_HILINK_ERR, -ENOLINK}, - {HCLGE_CMD_QUEUE_ILLEGAL, -ENXIO}, - {HCLGE_CMD_INVALID, -EBADR}, - }; - u32 errcode_count = ARRAY_SIZE(hclge_cmd_errcode); - u32 i; - - for (i = 0; i < errcode_count; i++) - if (hclge_cmd_errcode[i].imp_errcode == desc_ret) - return hclge_cmd_errcode[i].common_errno; - - return -EIO; -} - -static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc, - int num, int ntc) -{ - u16 opcode, desc_ret; - int handle; - - opcode = le16_to_cpu(desc[0].opcode); - for (handle = 0; handle < num; handle++) { - desc[handle] = hw->cmq.csq.desc[ntc]; - ntc++; - if (ntc >= hw->cmq.csq.desc_num) - ntc = 0; - } - if (likely(!hclge_is_special_opcode(opcode))) - desc_ret = le16_to_cpu(desc[num - 1].retval); - else - desc_ret = le16_to_cpu(desc[0].retval); - - hw->cmq.last_status = desc_ret; - - return hclge_cmd_convert_err_code(desc_ret); -} - -static int hclge_cmd_check_result(struct hclge_hw *hw, struct hclge_desc *desc, - int num, int ntc) -{ - struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw); - bool is_completed = false; - u32 timeout = 0; - int handle, ret; - - /** - * If the command is sync, wait for the firmware to write back, - * if multi descriptors to be sent, use the first one to check - */ - if (HCLGE_SEND_SYNC(le16_to_cpu(desc->flag))) { - do { - if (hclge_cmd_csq_done(hw)) { - is_completed = true; - break; - } - udelay(1); - timeout++; - } while (timeout < hw->cmq.tx_timeout); - } - - if (!is_completed) - ret = -EBADE; - else - ret = hclge_cmd_check_retval(hw, desc, num, ntc); - - /* Clean the command send queue */ - handle = hclge_cmd_csq_clean(hw); - if (handle < 0) - ret = handle; - else if (handle != num) - dev_warn(&hdev->pdev->dev, - "cleaned %d, need to clean %d\n", handle, num); - return ret; + hclge_cmd_config_regs(hw, &hw->hw.cmq.csq); + hclge_cmd_config_regs(hw, &hw->hw.cmq.crq); } /** @@ -311,43 +121,7 @@ static int hclge_cmd_check_result(struct hclge_hw *hw, struct hclge_desc *desc, **/ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) { - struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw); - struct hclge_cmq_ring *csq = &hw->cmq.csq; - int ret; - int ntc; - - spin_lock_bh(&hw->cmq.csq.lock); - - if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) { - spin_unlock_bh(&hw->cmq.csq.lock); - return -EBUSY; - } - - if (num > hclge_ring_space(&hw->cmq.csq)) { - /* If CMDQ ring is full, SW HEAD and HW HEAD may be different, - * need update the SW HEAD pointer csq->next_to_clean - */ - csq->next_to_clean = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG); - spin_unlock_bh(&hw->cmq.csq.lock); - return -EBUSY; - } - - /** - * Record the location of desc in the ring for this time - * which will be use for hardware to write back - */ - ntc = hw->cmq.csq.next_to_use; - - hclge_cmd_copy_desc(hw, desc, num); - - /* Write to hardware */ - hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, hw->cmq.csq.next_to_use); - - ret = hclge_cmd_check_result(hw, desc, num, ntc); - - spin_unlock_bh(&hw->cmq.csq.lock); - - return ret; + return hclge_comm_cmd_send(&hw->hw, desc, num, true); } static void hclge_set_default_capability(struct hclge_dev *hdev) @@ -401,7 +175,7 @@ static __le32 hclge_build_api_caps(void) return cpu_to_le32(api_caps); } -static enum hclge_cmd_status +static enum hclge_comm_cmd_status hclge_cmd_query_version_and_capability(struct hclge_dev *hdev) { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); @@ -433,18 +207,22 @@ hclge_cmd_query_version_and_capability(struct hclge_dev *hdev) int hclge_cmd_queue_init(struct hclge_dev *hdev) { + struct hclge_comm_cmq *cmdq = &hdev->hw.hw.cmq; int ret; /* Setup the lock for command queue */ - spin_lock_init(&hdev->hw.cmq.csq.lock); - spin_lock_init(&hdev->hw.cmq.crq.lock); + spin_lock_init(&cmdq->csq.lock); + spin_lock_init(&cmdq->crq.lock); + + cmdq->csq.pdev = hdev->pdev; + cmdq->crq.pdev = hdev->pdev; /* Setup the queue entries for use cmd queue */ - hdev->hw.cmq.csq.desc_num = HCLGE_NIC_CMQ_DESC_NUM; - hdev->hw.cmq.crq.desc_num = HCLGE_NIC_CMQ_DESC_NUM; + cmdq->csq.desc_num = HCLGE_NIC_CMQ_DESC_NUM; + cmdq->crq.desc_num = HCLGE_NIC_CMQ_DESC_NUM; /* Setup Tx write back timeout */ - hdev->hw.cmq.tx_timeout = HCLGE_CMDQ_TX_TIMEOUT; + cmdq->tx_timeout = HCLGE_CMDQ_TX_TIMEOUT; /* Setup queue rings */ ret = hclge_alloc_cmd_queue(hdev, HCLGE_TYPE_CSQ); @@ -463,7 +241,7 @@ int hclge_cmd_queue_init(struct hclge_dev *hdev) return 0; err_csq: - hclge_free_cmd_desc(&hdev->hw.cmq.csq); + hclge_free_cmd_desc(&hdev->hw.hw.cmq.csq); return ret; } @@ -491,22 +269,23 @@ static int hclge_firmware_compat_config(struct hclge_dev *hdev, bool en) int hclge_cmd_init(struct hclge_dev *hdev) { + struct hclge_comm_cmq *cmdq = &hdev->hw.hw.cmq; int ret; - spin_lock_bh(&hdev->hw.cmq.csq.lock); - spin_lock(&hdev->hw.cmq.crq.lock); + spin_lock_bh(&cmdq->csq.lock); + spin_lock(&cmdq->crq.lock); - hdev->hw.cmq.csq.next_to_clean = 0; - hdev->hw.cmq.csq.next_to_use = 0; - hdev->hw.cmq.crq.next_to_clean = 0; - hdev->hw.cmq.crq.next_to_use = 0; + cmdq->csq.next_to_clean = 0; + cmdq->csq.next_to_use = 0; + cmdq->crq.next_to_clean = 0; + cmdq->crq.next_to_use = 0; hclge_cmd_init_regs(&hdev->hw); - spin_unlock(&hdev->hw.cmq.crq.lock); - spin_unlock_bh(&hdev->hw.cmq.csq.lock); + spin_unlock(&cmdq->crq.lock); + spin_unlock_bh(&cmdq->csq.lock); - clear_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + clear_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state); /* Check if there is new reset pending, because the higher level * reset may happen when lower level reset is being processed. @@ -550,7 +329,7 @@ int hclge_cmd_init(struct hclge_dev *hdev) return 0; err_cmd_init: - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state); return ret; } @@ -571,19 +350,23 @@ static void hclge_cmd_uninit_regs(struct hclge_hw *hw) void hclge_cmd_uninit(struct hclge_dev *hdev) { + struct hclge_comm_cmq *cmdq = &hdev->hw.hw.cmq; + + cmdq->csq.pdev = hdev->pdev; + hclge_firmware_compat_config(hdev, false); - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state); /* wait to ensure that the firmware completes the possible left * over commands. */ msleep(HCLGE_CMDQ_CLEAR_WAIT_TIME); - spin_lock_bh(&hdev->hw.cmq.csq.lock); - spin_lock(&hdev->hw.cmq.crq.lock); + spin_lock_bh(&cmdq->csq.lock); + spin_lock(&cmdq->crq.lock); hclge_cmd_uninit_regs(&hdev->hw); - spin_unlock(&hdev->hw.cmq.crq.lock); - spin_unlock_bh(&hdev->hw.cmq.csq.lock); + spin_unlock(&cmdq->crq.lock); + spin_unlock_bh(&cmdq->csq.lock); - hclge_free_cmd_desc(&hdev->hw.cmq.csq); - hclge_free_cmd_desc(&hdev->hw.cmq.crq); + hclge_free_cmd_desc(&cmdq->csq); + hclge_free_cmd_desc(&cmdq->crq); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 33244472e0..303a7592bb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -7,74 +7,22 @@ #include #include #include "hnae3.h" +#include "hclge_comm_cmd.h" #define HCLGE_CMDQ_TX_TIMEOUT 30000 #define HCLGE_CMDQ_CLEAR_WAIT_TIME 200 -#define HCLGE_DESC_DATA_LEN 6 struct hclge_dev; -struct hclge_desc { - __le16 opcode; #define HCLGE_CMDQ_RX_INVLD_B 0 #define HCLGE_CMDQ_RX_OUTVLD_B 1 - __le16 flag; - __le16 retval; - __le16 rsv; - __le32 data[HCLGE_DESC_DATA_LEN]; -}; - -struct hclge_cmq_ring { - dma_addr_t desc_dma_addr; - struct hclge_desc *desc; - struct hclge_dev *dev; - u32 head; - u32 tail; - - u16 buf_size; - u16 desc_num; - int next_to_use; - int next_to_clean; - u8 ring_type; /* cmq ring type */ - spinlock_t lock; /* Command queue lock */ -}; - -enum hclge_cmd_return_status { - HCLGE_CMD_EXEC_SUCCESS = 0, - HCLGE_CMD_NO_AUTH = 1, - HCLGE_CMD_NOT_SUPPORTED = 2, - HCLGE_CMD_QUEUE_FULL = 3, - HCLGE_CMD_NEXT_ERR = 4, - HCLGE_CMD_UNEXE_ERR = 5, - HCLGE_CMD_PARA_ERR = 6, - HCLGE_CMD_RESULT_ERR = 7, - HCLGE_CMD_TIMEOUT = 8, - HCLGE_CMD_HILINK_ERR = 9, - HCLGE_CMD_QUEUE_ILLEGAL = 10, - HCLGE_CMD_INVALID = 11, -}; - -enum hclge_cmd_status { - HCLGE_STATUS_SUCCESS = 0, - HCLGE_ERR_CSQ_FULL = -1, - HCLGE_ERR_CSQ_TIMEOUT = -2, - HCLGE_ERR_CSQ_ERROR = -3, -}; - struct hclge_misc_vector { u8 __iomem *addr; int vector_irq; char name[HNAE3_INT_NAME_LEN]; }; -struct hclge_cmq { - struct hclge_cmq_ring csq; - struct hclge_cmq_ring crq; - u16 tx_timeout; - enum hclge_cmd_status last_status; -}; - #define HCLGE_CMD_FLAG_IN BIT(0) #define HCLGE_CMD_FLAG_OUT BIT(1) #define HCLGE_CMD_FLAG_NEXT BIT(2) @@ -1188,7 +1136,9 @@ struct hclge_dev_specs_1_cmd { __le16 max_frm_size; __le16 max_qset_num; __le16 max_int_gl; - u8 rsv1[18]; + u8 rsv0[2]; + __le16 umv_size; + u8 rsv1[14]; }; /* mac speed type defined in firmware command */ @@ -1241,25 +1191,6 @@ struct hclge_caps_bit_map { }; int hclge_cmd_init(struct hclge_dev *hdev); -static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value) -{ - writel(value, base + reg); -} - -#define hclge_write_dev(a, reg, value) \ - hclge_write_reg((a)->io_base, reg, value) -#define hclge_read_dev(a, reg) \ - hclge_read_reg((a)->io_base, reg) - -static inline u32 hclge_read_reg(u8 __iomem *base, u32 reg) -{ - u8 __iomem *reg_addr = READ_ONCE(base); - - return readl(reg_addr + reg); -} - -#define HCLGE_SEND_SYNC(flag) \ - ((flag) & HCLGE_CMD_FLAG_NO_INTR) struct hclge_hw; int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num); @@ -1267,10 +1198,10 @@ void hclge_cmd_setup_basic_desc(struct hclge_desc *desc, enum hclge_opcode_type opcode, bool is_read); void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read); -enum hclge_cmd_status hclge_cmd_mdio_write(struct hclge_hw *hw, - struct hclge_desc *desc); -enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw, - struct hclge_desc *desc); +enum hclge_comm_cmd_status hclge_cmd_mdio_write(struct hclge_hw *hw, + struct hclge_desc *desc); +enum hclge_comm_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw, + struct hclge_desc *desc); void hclge_cmd_uninit(struct hclge_dev *hdev); int hclge_cmd_queue_init(struct hclge_dev *hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 598da1be22..d58048b056 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -24,6 +24,7 @@ #include "hclge_err.h" #include "hnae3.h" #include "hclge_devlink.h" +#include "hclge_comm_cmd.h" #define HCLGE_NAME "hclge" @@ -1343,8 +1344,6 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) cfg->umv_space = hnae3_get_field(__le32_to_cpu(req->param[1]), HCLGE_CFG_UMV_TBL_SPACE_M, HCLGE_CFG_UMV_TBL_SPACE_S); - if (!cfg->umv_space) - cfg->umv_space = HCLGE_DEFAULT_UMV_SPACE_PER_PF; cfg->pf_rss_size_max = hnae3_get_field(__le32_to_cpu(req->param[2]), HCLGE_CFG_PF_RSS_SIZE_M, @@ -1420,6 +1419,7 @@ static void hclge_set_default_dev_specs(struct hclge_dev *hdev) ae_dev->dev_specs.max_int_gl = HCLGE_DEF_MAX_INT_GL; ae_dev->dev_specs.max_frm_size = HCLGE_MAC_MAX_FRAME; ae_dev->dev_specs.max_qset_num = HCLGE_MAX_QSET_NUM; + ae_dev->dev_specs.umv_size = HCLGE_DEFAULT_UMV_SPACE_PER_PF; } static void hclge_parse_dev_specs(struct hclge_dev *hdev, @@ -1441,6 +1441,7 @@ static void hclge_parse_dev_specs(struct hclge_dev *hdev, ae_dev->dev_specs.max_qset_num = le16_to_cpu(req1->max_qset_num); ae_dev->dev_specs.max_int_gl = le16_to_cpu(req1->max_int_gl); ae_dev->dev_specs.max_frm_size = le16_to_cpu(req1->max_frm_size); + ae_dev->dev_specs.umv_size = le16_to_cpu(req1->umv_size); } static void hclge_check_dev_specs(struct hclge_dev *hdev) @@ -1461,6 +1462,8 @@ static void hclge_check_dev_specs(struct hclge_dev *hdev) dev_specs->max_int_gl = HCLGE_DEF_MAX_INT_GL; if (!dev_specs->max_frm_size) dev_specs->max_frm_size = HCLGE_MAC_MAX_FRAME; + if (!dev_specs->umv_size) + dev_specs->umv_size = HCLGE_DEFAULT_UMV_SPACE_PER_PF; } static int hclge_query_dev_specs(struct hclge_dev *hdev) @@ -1550,7 +1553,10 @@ static int hclge_configure(struct hclge_dev *hdev) hdev->tm_info.num_pg = 1; hdev->tc_max = cfg.tc_num; hdev->tm_info.hw_pfc_map = 0; - hdev->wanted_umv_size = cfg.umv_space; + if (cfg.umv_space) + hdev->wanted_umv_size = cfg.umv_space; + else + hdev->wanted_umv_size = hdev->ae_dev->dev_specs.umv_size; hdev->tx_spare_buf_size = cfg.tx_spare_buf_size; hdev->gro_en = true; if (cfg.vlan_fliter_cap == HCLGE_VLAN_FLTR_CAN_MDF) @@ -1567,6 +1573,9 @@ static int hclge_configure(struct hclge_dev *hdev) cfg.default_speed, ret); return ret; } + hdev->hw.mac.req_speed = hdev->hw.mac.speed; + hdev->hw.mac.req_autoneg = AUTONEG_ENABLE; + hdev->hw.mac.req_duplex = DUPLEX_FULL; hclge_parse_link_mode(hdev, cfg.speed_ability); @@ -1669,11 +1678,11 @@ static int hclge_alloc_tqps(struct hclge_dev *hdev) * HCLGE_TQP_MAX_SIZE_DEV_V2 */ if (i < HCLGE_TQP_MAX_SIZE_DEV_V2) - tqp->q.io_base = hdev->hw.io_base + + tqp->q.io_base = hdev->hw.hw.io_base + HCLGE_TQP_REG_OFFSET + i * HCLGE_TQP_REG_SIZE; else - tqp->q.io_base = hdev->hw.io_base + + tqp->q.io_base = hdev->hw.hw.io_base + HCLGE_TQP_REG_OFFSET + HCLGE_TQP_EXT_REG_OFFSET + (i - HCLGE_TQP_MAX_SIZE_DEV_V2) * @@ -1816,8 +1825,9 @@ static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps) nic->pdev = hdev->pdev; nic->ae_algo = &ae_algo; - nic->numa_node_mask = hdev->numa_node_mask; - nic->kinfo.io_base = hdev->hw.io_base; + bitmap_copy(nic->numa_node_mask.bits, hdev->numa_node_mask.bits, + MAX_NUMNODES); + nic->kinfo.io_base = hdev->hw.hw.io_base; ret = hclge_knic_setup(vport, num_tqps, hdev->num_tx_desc, hdev->num_rx_desc); @@ -2503,12 +2513,13 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport) roce->rinfo.base_vector = hdev->num_nic_msi; roce->rinfo.netdev = nic->kinfo.netdev; - roce->rinfo.roce_io_base = hdev->hw.io_base; - roce->rinfo.roce_mem_base = hdev->hw.mem_base; + roce->rinfo.roce_io_base = hdev->hw.hw.io_base; + roce->rinfo.roce_mem_base = hdev->hw.hw.mem_base; roce->pdev = nic->pdev; roce->ae_algo = nic->ae_algo; - roce->numa_node_mask = nic->numa_node_mask; + bitmap_copy(roce->numa_node_mask.bits, nic->numa_node_mask.bits, + MAX_NUMNODES); return 0; } @@ -2847,16 +2858,20 @@ static int hclge_mac_init(struct hclge_dev *hdev) static void hclge_mbx_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && - !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state)) + !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state)) { + hdev->last_mbx_scheduled = jiffies; mod_delayed_work(hclge_wq, &hdev->service_task, 0); + } } static void hclge_reset_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state) && - !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) + !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) { + hdev->last_rst_scheduled = jiffies; mod_delayed_work(hclge_wq, &hdev->service_task, 0); + } } static void hclge_errhand_task_schedule(struct hclge_dev *hdev) @@ -3158,9 +3173,9 @@ hclge_set_phy_link_ksettings(struct hnae3_handle *handle, return ret; } - hdev->hw.mac.autoneg = cmd->base.autoneg; - hdev->hw.mac.speed = cmd->base.speed; - hdev->hw.mac.duplex = cmd->base.duplex; + hdev->hw.mac.req_autoneg = cmd->base.autoneg; + hdev->hw.mac.req_speed = cmd->base.speed; + hdev->hw.mac.req_duplex = cmd->base.duplex; linkmode_copy(hdev->hw.mac.advertising, cmd->link_modes.advertising); return 0; @@ -3193,9 +3208,9 @@ static int hclge_tp_port_init(struct hclge_dev *hdev) if (!hnae3_dev_phy_imp_supported(hdev)) return 0; - cmd.base.autoneg = hdev->hw.mac.autoneg; - cmd.base.speed = hdev->hw.mac.speed; - cmd.base.duplex = hdev->hw.mac.duplex; + cmd.base.autoneg = hdev->hw.mac.req_autoneg; + cmd.base.speed = hdev->hw.mac.req_speed; + cmd.base.duplex = hdev->hw.mac.req_duplex; linkmode_copy(cmd.link_modes.advertising, hdev->hw.mac.advertising); return hclge_set_phy_link_ksettings(&hdev->vport->nic, &cmd); @@ -3354,7 +3369,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & msix_src_reg) { dev_info(&hdev->pdev->dev, "IMP reset interrupt\n"); set_bit(HNAE3_IMP_RESET, &hdev->reset_pending); - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state); *clearval = BIT(HCLGE_VECTOR0_IMPRESET_INT_B); hdev->rst_stats.imp_rst_cnt++; return HCLGE_VECTOR0_EVENT_RST; @@ -3362,7 +3377,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & msix_src_reg) { dev_info(&hdev->pdev->dev, "global reset interrupt\n"); - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state); set_bit(HNAE3_GLOBAL_RESET, &hdev->reset_pending); *clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B); hdev->rst_stats.global_rst_cnt++; @@ -3501,7 +3516,7 @@ static void hclge_get_misc_vector(struct hclge_dev *hdev) vector->vector_irq = pci_irq_vector(hdev->pdev, 0); - vector->addr = hdev->hw.io_base + HCLGE_MISC_VECTOR_REG_BASE; + vector->addr = hdev->hw.hw.io_base + HCLGE_MISC_VECTOR_REG_BASE; hdev->vector_status[0] = 0; hdev->num_msi_left -= 1; @@ -3685,10 +3700,17 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) static void hclge_mailbox_service_task(struct hclge_dev *hdev) { if (!test_and_clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state) || - test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) || + test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state) || test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state)) return; + if (time_is_before_jiffies(hdev->last_mbx_scheduled + + HCLGE_MBX_SCHED_TIMEOUT)) + dev_warn(&hdev->pdev->dev, + "mbx service task is scheduled after %ums on cpu%u!\n", + jiffies_to_msecs(jiffies - hdev->last_mbx_scheduled), + smp_processor_id()); + hclge_mbx_handler(hdev); clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state); @@ -3925,7 +3947,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev) * any mailbox handling or command to firmware is only valid * after hclge_cmd_init is called. */ - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state); hdev->rst_stats.pf_rst_cnt++; break; case HNAE3_FLR_RESET: @@ -4338,6 +4360,13 @@ static void hclge_reset_service_task(struct hclge_dev *hdev) if (!test_and_clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) return; + if (time_is_before_jiffies(hdev->last_rst_scheduled + + HCLGE_RESET_SCHED_TIMEOUT)) + dev_warn(&hdev->pdev->dev, + "reset service task is scheduled after %ums on cpu%u!\n", + jiffies_to_msecs(jiffies - hdev->last_rst_scheduled), + smp_processor_id()); + down(&hdev->reset_sem); set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); @@ -4472,11 +4501,11 @@ static void hclge_get_vector_info(struct hclge_dev *hdev, u16 idx, /* need an extend offset to config vector >= 64 */ if (idx - 1 < HCLGE_PF_MAX_VECTOR_NUM_DEV_V2) - vector_info->io_addr = hdev->hw.io_base + + vector_info->io_addr = hdev->hw.hw.io_base + HCLGE_VECTOR_REG_BASE + (idx - 1) * HCLGE_VECTOR_REG_OFFSET; else - vector_info->io_addr = hdev->hw.io_base + + vector_info->io_addr = hdev->hw.hw.io_base + HCLGE_VECTOR_EXT_REG_BASE + (idx - 1) / HCLGE_PF_MAX_VECTOR_NUM_DEV_V2 * HCLGE_VECTOR_REG_OFFSET_H + @@ -5114,7 +5143,7 @@ int hclge_bind_ring_with_vector(struct hclge_vport *vport, struct hclge_desc desc; struct hclge_ctrl_vector_chain_cmd *req = (struct hclge_ctrl_vector_chain_cmd *)desc.data; - enum hclge_cmd_status status; + enum hclge_comm_cmd_status status; enum hclge_opcode_type op; u16 tqp_type_and_id; int i; @@ -7640,7 +7669,7 @@ static bool hclge_get_cmdq_stat(struct hnae3_handle *handle) struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - return test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + return test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state); } static bool hclge_ae_dev_resetting(struct hnae3_handle *handle) @@ -8127,8 +8156,7 @@ static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable) /* Set the DOWN flag here to disable link updating */ set_bit(HCLGE_STATE_DOWN, &hdev->state); - /* flush memory to make sure DOWN is seen by service task */ - smp_mb__before_atomic(); + smp_mb__after_atomic(); /* flush memory to make sure DOWN is seen by service task */ hclge_flush_link_update(hdev); } } @@ -8838,7 +8866,7 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport, char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN]; struct hclge_dev *hdev = vport->back; struct hclge_mac_vlan_tbl_entry_cmd req; - enum hclge_cmd_status status; + enum hclge_comm_cmd_status status; struct hclge_desc desc[3]; /* mac addr check */ @@ -10074,67 +10102,85 @@ static int hclge_set_vlan_protocol_type(struct hclge_dev *hdev) return status; } -static int hclge_init_vlan_config(struct hclge_dev *hdev) +static int hclge_init_vlan_filter(struct hclge_dev *hdev) { -#define HCLGE_DEF_VLAN_TYPE 0x8100 - - struct hnae3_handle *handle = &hdev->vport[0].nic; struct hclge_vport *vport; + bool enable = true; int ret; int i; - if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) { - /* for revision 0x21, vf vlan filter is per function */ - for (i = 0; i < hdev->num_alloc_vport; i++) { - vport = &hdev->vport[i]; - ret = hclge_set_vlan_filter_ctrl(hdev, - HCLGE_FILTER_TYPE_VF, - HCLGE_FILTER_FE_EGRESS, - true, - vport->vport_id); - if (ret) - return ret; - vport->cur_vlan_fltr_en = true; - } + if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) + return hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF, + HCLGE_FILTER_FE_EGRESS_V1_B, + true, 0); - ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, - HCLGE_FILTER_FE_INGRESS, true, - 0); - if (ret) - return ret; - } else { + /* for revision 0x21, vf vlan filter is per function */ + for (i = 0; i < hdev->num_alloc_vport; i++) { + vport = &hdev->vport[i]; ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF, - HCLGE_FILTER_FE_EGRESS_V1_B, - true, 0); + HCLGE_FILTER_FE_EGRESS, true, + vport->vport_id); if (ret) return ret; + vport->cur_vlan_fltr_en = true; } - hdev->vlan_type_cfg.rx_in_fst_vlan_type = HCLGE_DEF_VLAN_TYPE; - hdev->vlan_type_cfg.rx_in_sec_vlan_type = HCLGE_DEF_VLAN_TYPE; - hdev->vlan_type_cfg.rx_ot_fst_vlan_type = HCLGE_DEF_VLAN_TYPE; - hdev->vlan_type_cfg.rx_ot_sec_vlan_type = HCLGE_DEF_VLAN_TYPE; - hdev->vlan_type_cfg.tx_ot_vlan_type = HCLGE_DEF_VLAN_TYPE; - hdev->vlan_type_cfg.tx_in_vlan_type = HCLGE_DEF_VLAN_TYPE; + if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, hdev->ae_dev->caps) && + !test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, hdev->ae_dev->caps)) + enable = false; - ret = hclge_set_vlan_protocol_type(hdev); - if (ret) - return ret; + return hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, + HCLGE_FILTER_FE_INGRESS, enable, 0); +} - for (i = 0; i < hdev->num_alloc_vport; i++) { - u16 vlan_tag; - u8 qos; +static int hclge_init_vlan_type(struct hclge_dev *hdev) +{ + hdev->vlan_type_cfg.rx_in_fst_vlan_type = ETH_P_8021Q; + hdev->vlan_type_cfg.rx_in_sec_vlan_type = ETH_P_8021Q; + hdev->vlan_type_cfg.rx_ot_fst_vlan_type = ETH_P_8021Q; + hdev->vlan_type_cfg.rx_ot_sec_vlan_type = ETH_P_8021Q; + hdev->vlan_type_cfg.tx_ot_vlan_type = ETH_P_8021Q; + hdev->vlan_type_cfg.tx_in_vlan_type = ETH_P_8021Q; + + return hclge_set_vlan_protocol_type(hdev); +} +static int hclge_init_vport_vlan_offload(struct hclge_dev *hdev) +{ + struct hclge_port_base_vlan_config *cfg; + struct hclge_vport *vport; + int ret; + int i; + + for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; - vlan_tag = vport->port_base_vlan_cfg.vlan_info.vlan_tag; - qos = vport->port_base_vlan_cfg.vlan_info.qos; + cfg = &vport->port_base_vlan_cfg; - ret = hclge_vlan_offload_cfg(vport, - vport->port_base_vlan_cfg.state, - vlan_tag, qos); + ret = hclge_vlan_offload_cfg(vport, cfg->state, + cfg->vlan_info.vlan_tag, + cfg->vlan_info.qos); if (ret) return ret; } + return 0; +} + +static int hclge_init_vlan_config(struct hclge_dev *hdev) +{ + struct hnae3_handle *handle = &hdev->vport[0].nic; + int ret; + + ret = hclge_init_vlan_filter(hdev); + if (ret) + return ret; + + ret = hclge_init_vlan_type(hdev); + if (ret) + return ret; + + ret = hclge_init_vport_vlan_offload(hdev); + if (ret) + return ret; return hclge_set_vlan_filter(handle, htons(ETH_P_8021Q), 0, false); } @@ -11424,10 +11470,11 @@ static int hclge_dev_mem_map(struct hclge_dev *hdev) if (!(pci_select_bars(pdev, IORESOURCE_MEM) & BIT(HCLGE_MEM_BAR))) return 0; - hw->mem_base = devm_ioremap_wc(&pdev->dev, - pci_resource_start(pdev, HCLGE_MEM_BAR), - pci_resource_len(pdev, HCLGE_MEM_BAR)); - if (!hw->mem_base) { + hw->hw.mem_base = + devm_ioremap_wc(&pdev->dev, + pci_resource_start(pdev, HCLGE_MEM_BAR), + pci_resource_len(pdev, HCLGE_MEM_BAR)); + if (!hw->hw.mem_base) { dev_err(&pdev->dev, "failed to map device memory\n"); return -EFAULT; } @@ -11466,8 +11513,8 @@ static int hclge_pci_init(struct hclge_dev *hdev) pci_set_master(pdev); hw = &hdev->hw; - hw->io_base = pcim_iomap(pdev, 2, 0); - if (!hw->io_base) { + hw->hw.io_base = pcim_iomap(pdev, 2, 0); + if (!hw->hw.io_base) { dev_err(&pdev->dev, "Can't map configuration register space\n"); ret = -ENOMEM; goto err_clr_master; @@ -11482,7 +11529,7 @@ static int hclge_pci_init(struct hclge_dev *hdev) return 0; err_unmap_io_base: - pcim_iounmap(pdev, hdev->hw.io_base); + pcim_iounmap(pdev, hdev->hw.hw.io_base); err_clr_master: pci_clear_master(pdev); pci_release_regions(pdev); @@ -11496,10 +11543,10 @@ static void hclge_pci_uninit(struct hclge_dev *hdev) { struct pci_dev *pdev = hdev->pdev; - if (hdev->hw.mem_base) - devm_iounmap(&pdev->dev, hdev->hw.mem_base); + if (hdev->hw.hw.mem_base) + devm_iounmap(&pdev->dev, hdev->hw.hw.mem_base); - pcim_iounmap(pdev, hdev->hw.io_base); + pcim_iounmap(pdev, hdev->hw.hw.io_base); pci_free_irq_vectors(pdev); pci_clear_master(pdev); pci_release_mem_regions(pdev); @@ -11560,7 +11607,7 @@ static void hclge_reset_prepare_general(struct hnae3_ae_dev *ae_dev, /* disable misc vector before reset done */ hclge_enable_vector(&hdev->misc_vector, false); - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state); if (hdev->reset_type == HNAE3_FLR_RESET) hdev->rst_stats.flr_rst_cnt++; @@ -11851,7 +11898,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) err_devlink_uninit: hclge_devlink_uninit(hdev); err_pci_uninit: - pcim_iounmap(pdev, hdev->hw.io_base); + pcim_iounmap(pdev, hdev->hw.hw.io_base); pci_clear_master(pdev); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index a716027df0..1ef5b4c862 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -228,7 +228,6 @@ enum HCLGE_DEV_STATE { HCLGE_STATE_MBX_HANDLING, HCLGE_STATE_ERR_SERVICE_SCHED, HCLGE_STATE_STATISTICS_UPDATING, - HCLGE_STATE_CMD_DISABLE, HCLGE_STATE_LINK_UPDATING, HCLGE_STATE_RST_FAIL, HCLGE_STATE_FD_TBL_CHANGED, @@ -275,10 +274,13 @@ struct hclge_mac { u8 media_type; /* port media type, e.g. fibre/copper/backplane */ u8 mac_addr[ETH_ALEN]; u8 autoneg; + u8 req_autoneg; u8 duplex; + u8 req_duplex; u8 support_autoneg; u8 speed_type; /* 0: sfp speed, 1: active speed */ u32 speed; + u32 req_speed; u32 max_speed; u32 speed_ability; /* speed ability supported by current media */ u32 module_type; /* sub media type, e.g. kr/cr/sr/lr */ @@ -294,11 +296,9 @@ struct hclge_mac { }; struct hclge_hw { - void __iomem *io_base; - void __iomem *mem_base; + struct hclge_comm_hw hw; struct hclge_mac mac; int num_vec; - struct hclge_cmq cmq; }; /* TQP stats */ @@ -613,6 +613,11 @@ struct key_info { #define MAX_FD_FILTER_NUM 4096 #define HCLGE_ARFS_EXPIRE_INTERVAL 5UL +#define hclge_read_dev(a, reg) \ + hclge_comm_read_reg((a)->hw.io_base, reg) +#define hclge_write_dev(a, reg, value) \ + hclge_comm_write_reg((a)->hw.io_base, reg, value) + enum HCLGE_FD_ACTIVE_RULE_TYPE { HCLGE_FD_RULE_NONE, HCLGE_FD_ARFS_ACTIVE, @@ -858,7 +863,7 @@ struct hclge_dev { u16 fdir_pf_filter_count; /* Num of guaranteed filters for this PF */ u16 num_alloc_vport; /* Num vports this driver supports */ - u32 numa_node_mask; + nodemask_t numa_node_mask; u16 rx_buf_len; u16 num_tx_desc; /* desc num of per tx queue */ u16 num_rx_desc; /* desc num of per rx queue */ @@ -925,6 +930,8 @@ struct hclge_dev { u16 hclge_fd_rule_num; unsigned long serv_processed_cnt; unsigned long last_serv_processed; + unsigned long last_rst_scheduled; + unsigned long last_mbx_scheduled; unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)]; enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type; u8 fd_en; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 5182051e54..77c432ab78 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -33,7 +33,7 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport, { struct hclge_mbx_pf_to_vf_cmd *resp_pf_to_vf; struct hclge_dev *hdev = vport->back; - enum hclge_cmd_status status; + enum hclge_comm_cmd_status status; struct hclge_desc desc; u16 resp; @@ -92,7 +92,7 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, { struct hclge_mbx_pf_to_vf_cmd *resp_pf_to_vf; struct hclge_dev *hdev = vport->back; - enum hclge_cmd_status status; + enum hclge_comm_cmd_status status; struct hclge_desc desc; if (msg_len > HCLGE_MBX_MAX_MSG_SIZE) { @@ -250,6 +250,81 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en, return ret; } +static int hclge_query_ring_vector_map(struct hclge_vport *vport, + struct hnae3_ring_chain_node *ring_chain, + struct hclge_desc *desc) +{ + struct hclge_ctrl_vector_chain_cmd *req = + (struct hclge_ctrl_vector_chain_cmd *)desc->data; + struct hclge_dev *hdev = vport->back; + u16 tqp_type_and_id; + int status; + + hclge_cmd_setup_basic_desc(desc, HCLGE_OPC_ADD_RING_TO_VECTOR, true); + + tqp_type_and_id = le16_to_cpu(req->tqp_type_and_id[0]); + hnae3_set_field(tqp_type_and_id, HCLGE_INT_TYPE_M, HCLGE_INT_TYPE_S, + hnae3_get_bit(ring_chain->flag, HNAE3_RING_TYPE_B)); + hnae3_set_field(tqp_type_and_id, HCLGE_TQP_ID_M, HCLGE_TQP_ID_S, + ring_chain->tqp_index); + req->tqp_type_and_id[0] = cpu_to_le16(tqp_type_and_id); + req->vfid = vport->vport_id; + + status = hclge_cmd_send(&hdev->hw, desc, 1); + if (status) + dev_err(&hdev->pdev->dev, + "Get VF ring vector map info fail, status is %d.\n", + status); + + return status; +} + +static int hclge_get_vf_ring_vector_map(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *req, + struct hclge_respond_to_vf_msg *resp) +{ +#define HCLGE_LIMIT_RING_NUM 1 +#define HCLGE_RING_TYPE_OFFSET 0 +#define HCLGE_TQP_INDEX_OFFSET 1 +#define HCLGE_INT_GL_INDEX_OFFSET 2 +#define HCLGE_VECTOR_ID_OFFSET 3 +#define HCLGE_RING_VECTOR_MAP_INFO_LEN 4 + struct hnae3_ring_chain_node ring_chain; + struct hclge_desc desc; + struct hclge_ctrl_vector_chain_cmd *data = + (struct hclge_ctrl_vector_chain_cmd *)desc.data; + u16 tqp_type_and_id; + u8 int_gl_index; + int ret; + + req->msg.ring_num = HCLGE_LIMIT_RING_NUM; + + memset(&ring_chain, 0, sizeof(ring_chain)); + ret = hclge_get_ring_chain_from_mbx(req, &ring_chain, vport); + if (ret) + return ret; + + ret = hclge_query_ring_vector_map(vport, &ring_chain, &desc); + if (ret) { + hclge_free_vector_ring_chain(&ring_chain); + return ret; + } + + tqp_type_and_id = le16_to_cpu(data->tqp_type_and_id[0]); + int_gl_index = hnae3_get_field(tqp_type_and_id, + HCLGE_INT_GL_IDX_M, HCLGE_INT_GL_IDX_S); + + resp->data[HCLGE_RING_TYPE_OFFSET] = req->msg.param[0].ring_type; + resp->data[HCLGE_TQP_INDEX_OFFSET] = req->msg.param[0].tqp_index; + resp->data[HCLGE_INT_GL_INDEX_OFFSET] = int_gl_index; + resp->data[HCLGE_VECTOR_ID_OFFSET] = data->int_vector_id_l; + resp->len = HCLGE_RING_VECTOR_MAP_INFO_LEN; + + hclge_free_vector_ring_chain(&ring_chain); + + return ret; +} + static void hclge_set_vf_promisc_mode(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *req) { @@ -670,7 +745,7 @@ static bool hclge_cmd_crq_empty(struct hclge_hw *hw) { u32 tail = hclge_read_dev(hw, HCLGE_NIC_CRQ_TAIL_REG); - return tail == hw->cmq.crq.next_to_use; + return tail == hw->hw.cmq.crq.next_to_use; } static void hclge_handle_ncsi_error(struct hclge_dev *hdev) @@ -699,20 +774,289 @@ static void hclge_handle_vf_tbl(struct hclge_vport *vport, } } +static int +hclge_mbx_map_ring_to_vector_handler(struct hclge_mbx_ops_param *param) +{ + return hclge_map_unmap_ring_to_vf_vector(param->vport, true, + param->req); +} + +static int +hclge_mbx_unmap_ring_to_vector_handler(struct hclge_mbx_ops_param *param) +{ + return hclge_map_unmap_ring_to_vf_vector(param->vport, false, + param->req); +} + +static int +hclge_mbx_get_ring_vector_map_handler(struct hclge_mbx_ops_param *param) +{ + int ret; + + ret = hclge_get_vf_ring_vector_map(param->vport, param->req, + param->resp_msg); + if (ret) + dev_err(¶m->vport->back->pdev->dev, + "PF fail(%d) to get VF ring vector map\n", + ret); + return ret; +} + +static int hclge_mbx_set_promisc_mode_handler(struct hclge_mbx_ops_param *param) +{ + hclge_set_vf_promisc_mode(param->vport, param->req); + return 0; +} + +static int hclge_mbx_set_unicast_handler(struct hclge_mbx_ops_param *param) +{ + int ret; + + ret = hclge_set_vf_uc_mac_addr(param->vport, param->req); + if (ret) + dev_err(¶m->vport->back->pdev->dev, + "PF fail(%d) to set VF UC MAC Addr\n", + ret); + return ret; +} + +static int hclge_mbx_set_multicast_handler(struct hclge_mbx_ops_param *param) +{ + int ret; + + ret = hclge_set_vf_mc_mac_addr(param->vport, param->req); + if (ret) + dev_err(¶m->vport->back->pdev->dev, + "PF fail(%d) to set VF MC MAC Addr\n", + ret); + return ret; +} + +static int hclge_mbx_set_vlan_handler(struct hclge_mbx_ops_param *param) +{ + int ret; + + ret = hclge_set_vf_vlan_cfg(param->vport, param->req, param->resp_msg); + if (ret) + dev_err(¶m->vport->back->pdev->dev, + "PF failed(%d) to config VF's VLAN\n", + ret); + return ret; +} + +static int hclge_mbx_set_alive_handler(struct hclge_mbx_ops_param *param) +{ + int ret; + + ret = hclge_set_vf_alive(param->vport, param->req); + if (ret) + dev_err(¶m->vport->back->pdev->dev, + "PF failed(%d) to set VF's ALIVE\n", + ret); + return ret; +} + +static int hclge_mbx_get_qinfo_handler(struct hclge_mbx_ops_param *param) +{ + hclge_get_vf_queue_info(param->vport, param->resp_msg); + return 0; +} + +static int hclge_mbx_get_qdepth_handler(struct hclge_mbx_ops_param *param) +{ + hclge_get_vf_queue_depth(param->vport, param->resp_msg); + return 0; +} + +static int hclge_mbx_get_basic_info_handler(struct hclge_mbx_ops_param *param) +{ + hclge_get_basic_info(param->vport, param->resp_msg); + return 0; +} + +static int hclge_mbx_get_link_status_handler(struct hclge_mbx_ops_param *param) +{ + int ret; + + ret = hclge_push_vf_link_status(param->vport); + if (ret) + dev_err(¶m->vport->back->pdev->dev, + "failed to inform link stat to VF, ret = %d\n", + ret); + return ret; +} + +static int hclge_mbx_queue_reset_handler(struct hclge_mbx_ops_param *param) +{ + return hclge_mbx_reset_vf_queue(param->vport, param->req, + param->resp_msg); +} + +static int hclge_mbx_reset_handler(struct hclge_mbx_ops_param *param) +{ + return hclge_reset_vf(param->vport); +} + +static int hclge_mbx_keep_alive_handler(struct hclge_mbx_ops_param *param) +{ + hclge_vf_keep_alive(param->vport); + return 0; +} + +static int hclge_mbx_set_mtu_handler(struct hclge_mbx_ops_param *param) +{ + int ret; + + ret = hclge_set_vf_mtu(param->vport, param->req); + if (ret) + dev_err(¶m->vport->back->pdev->dev, + "VF fail(%d) to set mtu\n", ret); + return ret; +} + +static int hclge_mbx_get_qid_in_pf_handler(struct hclge_mbx_ops_param *param) +{ + return hclge_get_queue_id_in_pf(param->vport, param->req, + param->resp_msg); +} + +static int hclge_mbx_get_rss_key_handler(struct hclge_mbx_ops_param *param) +{ + return hclge_get_rss_key(param->vport, param->req, param->resp_msg); +} + +static int hclge_mbx_get_link_mode_handler(struct hclge_mbx_ops_param *param) +{ + hclge_get_link_mode(param->vport, param->req); + return 0; +} + +static int +hclge_mbx_get_vf_flr_status_handler(struct hclge_mbx_ops_param *param) +{ + hclge_rm_vport_all_mac_table(param->vport, false, + HCLGE_MAC_ADDR_UC); + hclge_rm_vport_all_mac_table(param->vport, false, + HCLGE_MAC_ADDR_MC); + hclge_rm_vport_all_vlan_table(param->vport, false); + return 0; +} + +static int hclge_mbx_vf_uninit_handler(struct hclge_mbx_ops_param *param) +{ + hclge_rm_vport_all_mac_table(param->vport, true, + HCLGE_MAC_ADDR_UC); + hclge_rm_vport_all_mac_table(param->vport, true, + HCLGE_MAC_ADDR_MC); + hclge_rm_vport_all_vlan_table(param->vport, true); + return 0; +} + +static int hclge_mbx_get_media_type_handler(struct hclge_mbx_ops_param *param) +{ + hclge_get_vf_media_type(param->vport, param->resp_msg); + return 0; +} + +static int hclge_mbx_push_link_status_handler(struct hclge_mbx_ops_param *param) +{ + hclge_handle_link_change_event(param->vport->back, param->req); + return 0; +} + +static int hclge_mbx_get_mac_addr_handler(struct hclge_mbx_ops_param *param) +{ + hclge_get_vf_mac_addr(param->vport, param->resp_msg); + return 0; +} + +static int hclge_mbx_ncsi_error_handler(struct hclge_mbx_ops_param *param) +{ + hclge_handle_ncsi_error(param->vport->back); + return 0; +} + +static int hclge_mbx_handle_vf_tbl_handler(struct hclge_mbx_ops_param *param) +{ + hclge_handle_vf_tbl(param->vport, param->req); + return 0; +} + +static const hclge_mbx_ops_fn hclge_mbx_ops_list[HCLGE_MBX_OPCODE_MAX] = { + [HCLGE_MBX_RESET] = hclge_mbx_reset_handler, + [HCLGE_MBX_SET_UNICAST] = hclge_mbx_set_unicast_handler, + [HCLGE_MBX_SET_MULTICAST] = hclge_mbx_set_multicast_handler, + [HCLGE_MBX_SET_VLAN] = hclge_mbx_set_vlan_handler, + [HCLGE_MBX_MAP_RING_TO_VECTOR] = hclge_mbx_map_ring_to_vector_handler, + [HCLGE_MBX_UNMAP_RING_TO_VECTOR] = hclge_mbx_unmap_ring_to_vector_handler, + [HCLGE_MBX_SET_PROMISC_MODE] = hclge_mbx_set_promisc_mode_handler, + [HCLGE_MBX_GET_QINFO] = hclge_mbx_get_qinfo_handler, + [HCLGE_MBX_GET_QDEPTH] = hclge_mbx_get_qdepth_handler, + [HCLGE_MBX_GET_BASIC_INFO] = hclge_mbx_get_basic_info_handler, + [HCLGE_MBX_GET_RSS_KEY] = hclge_mbx_get_rss_key_handler, + [HCLGE_MBX_GET_MAC_ADDR] = hclge_mbx_get_mac_addr_handler, + [HCLGE_MBX_GET_LINK_STATUS] = hclge_mbx_get_link_status_handler, + [HCLGE_MBX_QUEUE_RESET] = hclge_mbx_queue_reset_handler, + [HCLGE_MBX_KEEP_ALIVE] = hclge_mbx_keep_alive_handler, + [HCLGE_MBX_SET_ALIVE] = hclge_mbx_set_alive_handler, + [HCLGE_MBX_SET_MTU] = hclge_mbx_set_mtu_handler, + [HCLGE_MBX_GET_QID_IN_PF] = hclge_mbx_get_qid_in_pf_handler, + [HCLGE_MBX_GET_LINK_MODE] = hclge_mbx_get_link_mode_handler, + [HCLGE_MBX_GET_MEDIA_TYPE] = hclge_mbx_get_media_type_handler, + [HCLGE_MBX_VF_UNINIT] = hclge_mbx_vf_uninit_handler, + [HCLGE_MBX_HANDLE_VF_TBL] = hclge_mbx_handle_vf_tbl_handler, + [HCLGE_MBX_GET_RING_VECTOR_MAP] = hclge_mbx_get_ring_vector_map_handler, + [HCLGE_MBX_GET_VF_FLR_STATUS] = hclge_mbx_get_vf_flr_status_handler, + [HCLGE_MBX_PUSH_LINK_STATUS] = hclge_mbx_push_link_status_handler, + [HCLGE_MBX_NCSI_ERROR] = hclge_mbx_ncsi_error_handler, +}; + +static void hclge_mbx_request_handling(struct hclge_mbx_ops_param *param) +{ + hclge_mbx_ops_fn cmd_func = NULL; + struct hclge_dev *hdev; + int ret = 0; + + hdev = param->vport->back; + cmd_func = hclge_mbx_ops_list[param->req->msg.code]; + if (!cmd_func) { + dev_err(&hdev->pdev->dev, + "un-supported mailbox message, code = %u\n", + param->req->msg.code); + return; + } + ret = cmd_func(param); + + /* PF driver should not reply IMP */ + if (hnae3_get_bit(param->req->mbx_need_resp, HCLGE_MBX_NEED_RESP_B) && + param->req->msg.code < HCLGE_MBX_GET_VF_FLR_STATUS) { + param->resp_msg->status = ret; + if (time_is_before_jiffies(hdev->last_mbx_scheduled + + HCLGE_MBX_SCHED_TIMEOUT)) + dev_warn(&hdev->pdev->dev, + "resp vport%u mbx(%u,%u) late\n", + param->req->mbx_src_vfid, + param->req->msg.code, + param->req->msg.subcode); + + hclge_gen_resp_to_vf(param->vport, param->req, param->resp_msg); + } +} + void hclge_mbx_handler(struct hclge_dev *hdev) { - struct hclge_cmq_ring *crq = &hdev->hw.cmq.crq; + struct hclge_comm_cmq_ring *crq = &hdev->hw.hw.cmq.crq; struct hclge_respond_to_vf_msg resp_msg; struct hclge_mbx_vf_to_pf_cmd *req; - struct hclge_vport *vport; + struct hclge_mbx_ops_param param; struct hclge_desc *desc; - bool is_del = false; unsigned int flag; - int ret = 0; + param.resp_msg = &resp_msg; /* handle all the mailbox requests in the queue */ while (!hclge_cmd_crq_empty(&hdev->hw)) { - if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) { + if (test_bit(HCLGE_COMM_STATE_CMD_DISABLE, + &hdev->hw.hw.comm_state)) { dev_warn(&hdev->pdev->dev, "command queue needs re-initializing\n"); return; @@ -733,136 +1077,16 @@ void hclge_mbx_handler(struct hclge_dev *hdev) continue; } - vport = &hdev->vport[req->mbx_src_vfid]; - trace_hclge_pf_mbx_get(hdev, req); /* clear the resp_msg before processing every mailbox message */ memset(&resp_msg, 0, sizeof(resp_msg)); - - switch (req->msg.code) { - case HCLGE_MBX_MAP_RING_TO_VECTOR: - ret = hclge_map_unmap_ring_to_vf_vector(vport, true, - req); - break; - case HCLGE_MBX_UNMAP_RING_TO_VECTOR: - ret = hclge_map_unmap_ring_to_vf_vector(vport, false, - req); - break; - case HCLGE_MBX_SET_PROMISC_MODE: - hclge_set_vf_promisc_mode(vport, req); - break; - case HCLGE_MBX_SET_UNICAST: - ret = hclge_set_vf_uc_mac_addr(vport, req); - if (ret) - dev_err(&hdev->pdev->dev, - "PF fail(%d) to set VF UC MAC Addr\n", - ret); - break; - case HCLGE_MBX_SET_MULTICAST: - ret = hclge_set_vf_mc_mac_addr(vport, req); - if (ret) - dev_err(&hdev->pdev->dev, - "PF fail(%d) to set VF MC MAC Addr\n", - ret); - break; - case HCLGE_MBX_SET_VLAN: - ret = hclge_set_vf_vlan_cfg(vport, req, &resp_msg); - if (ret) - dev_err(&hdev->pdev->dev, - "PF failed(%d) to config VF's VLAN\n", - ret); - break; - case HCLGE_MBX_SET_ALIVE: - ret = hclge_set_vf_alive(vport, req); - if (ret) - dev_err(&hdev->pdev->dev, - "PF failed(%d) to set VF's ALIVE\n", - ret); - break; - case HCLGE_MBX_GET_QINFO: - hclge_get_vf_queue_info(vport, &resp_msg); - break; - case HCLGE_MBX_GET_QDEPTH: - hclge_get_vf_queue_depth(vport, &resp_msg); - break; - case HCLGE_MBX_GET_BASIC_INFO: - hclge_get_basic_info(vport, &resp_msg); - break; - case HCLGE_MBX_GET_LINK_STATUS: - ret = hclge_push_vf_link_status(vport); - if (ret) - dev_err(&hdev->pdev->dev, - "failed to inform link stat to VF, ret = %d\n", - ret); - break; - case HCLGE_MBX_QUEUE_RESET: - ret = hclge_mbx_reset_vf_queue(vport, req, &resp_msg); - break; - case HCLGE_MBX_RESET: - ret = hclge_reset_vf(vport); - break; - case HCLGE_MBX_KEEP_ALIVE: - hclge_vf_keep_alive(vport); - break; - case HCLGE_MBX_SET_MTU: - ret = hclge_set_vf_mtu(vport, req); - if (ret) - dev_err(&hdev->pdev->dev, - "VF fail(%d) to set mtu\n", ret); - break; - case HCLGE_MBX_GET_QID_IN_PF: - ret = hclge_get_queue_id_in_pf(vport, req, &resp_msg); - break; - case HCLGE_MBX_GET_RSS_KEY: - ret = hclge_get_rss_key(vport, req, &resp_msg); - break; - case HCLGE_MBX_GET_LINK_MODE: - hclge_get_link_mode(vport, req); - break; - case HCLGE_MBX_GET_VF_FLR_STATUS: - case HCLGE_MBX_VF_UNINIT: - is_del = req->msg.code == HCLGE_MBX_VF_UNINIT; - hclge_rm_vport_all_mac_table(vport, is_del, - HCLGE_MAC_ADDR_UC); - hclge_rm_vport_all_mac_table(vport, is_del, - HCLGE_MAC_ADDR_MC); - hclge_rm_vport_all_vlan_table(vport, is_del); - break; - case HCLGE_MBX_GET_MEDIA_TYPE: - hclge_get_vf_media_type(vport, &resp_msg); - break; - case HCLGE_MBX_PUSH_LINK_STATUS: - hclge_handle_link_change_event(hdev, req); - break; - case HCLGE_MBX_GET_MAC_ADDR: - hclge_get_vf_mac_addr(vport, &resp_msg); - break; - case HCLGE_MBX_NCSI_ERROR: - hclge_handle_ncsi_error(hdev); - break; - case HCLGE_MBX_HANDLE_VF_TBL: - hclge_handle_vf_tbl(vport, req); - break; - default: - dev_err(&hdev->pdev->dev, - "un-supported mailbox message, code = %u\n", - req->msg.code); - break; - } - - /* PF driver should not reply IMP */ - if (hnae3_get_bit(req->mbx_need_resp, HCLGE_MBX_NEED_RESP_B) && - req->msg.code < HCLGE_MBX_GET_VF_FLR_STATUS) { - resp_msg.status = ret; - hclge_gen_resp_to_vf(vport, req, &resp_msg); - } + param.vport = &hdev->vport[req->mbx_src_vfid]; + param.req = req; + hclge_mbx_request_handling(¶m); crq->desc[crq->next_to_use].flag = 0; hclge_mbx_ring_ptr_move_crq(crq); - - /* reinitialize ret after complete the mbx message processing */ - ret = 0; } /* Write back CMDQ_RQ header pointer, M7 need this pointer */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 1231c34f09..63d2be4349 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -47,7 +47,7 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum, struct hclge_desc desc; int ret; - if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) + if (test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state)) return 0; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false); @@ -85,7 +85,7 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum) struct hclge_desc desc; int ret; - if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) + if (test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state)) return 0; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index dd0750f6da..0f06f95b09 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -464,7 +464,7 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev) } spin_lock_init(&ptp->lock); - ptp->io_base = hdev->hw.io_base + HCLGE_PTP_REG_OFFSET; + ptp->io_base = hdev->hw.hw.io_base + HCLGE_PTP_REG_OFFSET; ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE; ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF; hdev->ptp = ptp; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h index 8510b88d49..f3cd5a376e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h @@ -24,7 +24,7 @@ TRACE_EVENT(hclge_pf_mbx_get, __field(u8, code) __field(u8, subcode) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->vport[0].nic.kinfo.netdev->name) + __string(devname, hdev->vport[0].nic.kinfo.netdev->name) __array(u32, mbx_data, PF_GET_MBX_LEN) ), @@ -33,7 +33,7 @@ TRACE_EVENT(hclge_pf_mbx_get, __entry->code = req->msg.code; __entry->subcode = req->msg.subcode; __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->vport[0].nic.kinfo.netdev->name); + __assign_str(devname, hdev->vport[0].nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_vf_to_pf_cmd)); ), @@ -56,7 +56,7 @@ TRACE_EVENT(hclge_pf_mbx_send, __field(u8, vfid) __field(u16, code) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->vport[0].nic.kinfo.netdev->name) + __string(devname, hdev->vport[0].nic.kinfo.netdev->name) __array(u32, mbx_data, PF_SEND_MBX_LEN) ), @@ -64,7 +64,7 @@ TRACE_EVENT(hclge_pf_mbx_send, __entry->vfid = req->dest_vfid; __entry->code = le16_to_cpu(req->msg.code); __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->vport[0].nic.kinfo.netdev->name); + __assign_str(devname, hdev->vport[0].nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_pf_to_vf_cmd)); ), diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile deleted file mode 100644 index 51ff7d86ee..0000000000 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0+ -# -# Makefile for the HISILICON network device drivers. -# - -ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 -ccflags-y += -I $(srctree)/$(src) - -obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o -hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o hclgevf_devlink.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index bd8468c2d9..a41e04796b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -537,7 +537,8 @@ static int hclgevf_set_handle_info(struct hclgevf_dev *hdev) nic->ae_algo = &ae_algovf; nic->pdev = hdev->pdev; - nic->numa_node_mask = hdev->numa_node_mask; + bitmap_copy(nic->numa_node_mask.bits, hdev->numa_node_mask.bits, + MAX_NUMNODES); nic->flags |= HNAE3_SUPPORT_VF; nic->kinfo.io_base = hdev->hw.io_base; @@ -2588,8 +2589,8 @@ static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev) roce->pdev = nic->pdev; roce->ae_algo = nic->ae_algo; - roce->numa_node_mask = nic->numa_node_mask; - + bitmap_copy(roce->numa_node_mask.bits, nic->numa_node_mask.bits, + MAX_NUMNODES); return 0; } @@ -2721,8 +2722,7 @@ static void hclgevf_set_timer_task(struct hnae3_handle *handle, bool enable) } else { set_bit(HCLGEVF_STATE_DOWN, &hdev->state); - /* flush memory to make sure DOWN is seen by service task */ - smp_mb__before_atomic(); + smp_mb__after_atomic(); /* flush memory to make sure DOWN is seen by service task */ hclgevf_flush_link_update(hdev); } } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 5c7538ca36..2b216ac969 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -298,7 +298,7 @@ struct hclgevf_dev { u16 rss_size_max; /* HW defined max RSS task queue */ u16 num_alloc_vport; /* num vports this driver supports */ - u32 numa_node_mask; + nodemask_t numa_node_mask; u16 rx_buf_len; u16 num_tx_desc; /* desc num of per tx queue */ u16 num_rx_desc; /* desc num of per rx queue */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h index 5d4895bb57..b259e95dd5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h @@ -23,7 +23,7 @@ TRACE_EVENT(hclge_vf_mbx_get, __field(u8, vfid) __field(u16, code) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->nic.kinfo.netdev->name) + __string(devname, hdev->nic.kinfo.netdev->name) __array(u32, mbx_data, VF_GET_MBX_LEN) ), @@ -31,7 +31,7 @@ TRACE_EVENT(hclge_vf_mbx_get, __entry->vfid = req->dest_vfid; __entry->code = le16_to_cpu(req->msg.code); __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->nic.kinfo.netdev->name); + __assign_str(devname, hdev->nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_pf_to_vf_cmd)); ), @@ -55,7 +55,7 @@ TRACE_EVENT(hclge_vf_mbx_send, __field(u8, code) __field(u8, subcode) __string(pciname, pci_name(hdev->pdev)) - __string(devname, &hdev->nic.kinfo.netdev->name) + __string(devname, hdev->nic.kinfo.netdev->name) __array(u32, mbx_data, VF_SEND_MBX_LEN) ), @@ -64,7 +64,7 @@ TRACE_EVENT(hclge_vf_mbx_send, __entry->code = req->msg.code; __entry->subcode = req->msg.subcode; __assign_str(pciname, pci_name(hdev->pdev)); - __assign_str(devname, &hdev->nic.kinfo.netdev->name); + __assign_str(devname, hdev->nic.kinfo.netdev->name); memcpy(__entry->mbx_data, req, sizeof(struct hclge_mbx_vf_to_pf_cmd)); ), diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 453a854106..a05103e2fb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -108,7 +108,7 @@ #define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */ /* driver state flags */ -enum i40e_state_t { +enum i40e_state { __I40E_TESTING, __I40E_CONFIG_BUSY, __I40E_CONFIG_DONE, @@ -156,7 +156,7 @@ enum i40e_state_t { BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED) /* VSI state flags */ -enum i40e_vsi_state_t { +enum i40e_vsi_state { __I40E_VSI_DOWN, __I40E_VSI_NEEDS_RESTART, __I40E_VSI_SYNCING_FILTERS, @@ -973,6 +973,8 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; + bool in_busy_poll; + int irq_num; /* IRQ assigned to this q_vector */ } ____cacheline_internodealigned_in_smp; /* lan device */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 7b522d55f3..991321b040 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1231,8 +1231,11 @@ int i40e_count_filters(struct i40e_vsi *vsi) int bkt; int cnt = 0; - hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) - ++cnt; + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_ACTIVE) + ++cnt; + } return cnt; } @@ -3868,6 +3871,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; + /* Set ITR for software interrupts triggered after exiting + * busy-loop polling. + */ + wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1), + I40E_ITR_20K); + wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); @@ -4130,6 +4139,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) } /* register for affinity change notifications */ + q_vector->irq_num = irq_num; q_vector->affinity_notify.notify = i40e_irq_affinity_notify; q_vector->affinity_notify.release = i40e_irq_affinity_release; irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); @@ -16137,8 +16147,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) val = (rd32(&pf->hw, I40E_PRTGL_SAH) & I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT; if (val < MAX_FRAME_SIZE_DEFAULT) - dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n", - pf->hw.port, val); + dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n", + pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out @@ -16680,7 +16690,7 @@ static int __init i40e_init_module(void) * since we need to be able to guarantee forward progress even under * memory pressure. */ - i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name); + i40e_wq = alloc_workqueue("%s", 0, 0, i40e_driver_name); if (!i40e_wq) { pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 09b1d5aed1..38942d3f78 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -35,7 +35,7 @@ enum i40e_ptp_pin { GPIO_4 }; -enum i40e_can_set_pins_t { +enum i40e_can_set_pins { CANT_DO_PINS = -1, CAN_SET_PINS, CAN_DO_PINS @@ -193,7 +193,7 @@ static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw) * return CAN_DO_PINS if pins can be manipulated within a NIC or * return CANT_DO_PINS otherwise. **/ -static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf) +static enum i40e_can_set_pins i40e_can_set_pins(struct i40e_pf *pf) { if (!i40e_is_ptp_pin_dev(&pf->hw)) { dev_warn(&pf->pdev->dev, @@ -1089,7 +1089,7 @@ static void i40e_ptp_set_pins_hw(struct i40e_pf *pf) static int i40e_ptp_set_pins(struct i40e_pf *pf, struct i40e_ptp_pins_settings *pins) { - enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf); + enum i40e_can_set_pins pin_caps = i40e_can_set_pins(pf); int i = 0; if (pin_caps == CANT_DO_PINS) diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 7339003aa1..694cb3e45c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -328,8 +328,11 @@ #define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3 #define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) #define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5 +#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT) #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24 #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT) +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25 +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT) #define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */ #define I40E_PFINT_ICR0_INTEVENT_SHIFT 0 #define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index cf8c3d480a..e2737875e3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2560,7 +2560,22 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_packets; } -static inline u32 i40e_buildreg_itr(const int type, u16 itr) +/** + * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register + * @itr_idx: interrupt throttling index + * @interval: interrupt throttling interval value in usecs + * @force_swint: force software interrupt + * + * The function builds a value for I40E_PFINT_DYN_CTLN register that + * is used to update interrupt throttling interval for specified ITR index + * and optionally enforces a software interrupt. If the @itr_idx is equal + * to I40E_ITR_NONE then no interval change is applied and only @force_swint + * parameter is taken into account. If the interval change and enforced + * software interrupt are not requested then the built value just enables + * appropriate vector interrupt. + **/ +static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval, + bool force_swint) { u32 val; @@ -2574,23 +2589,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) * an event in the PBA anyway so we need to rely on the automask * to hold pending events for us until the interrupt is re-enabled * - * The itr value is reported in microseconds, and the register - * value is recorded in 2 microsecond units. For this reason we - * only need to shift by the interval shift - 1 instead of the - * full value. + * We have to shift the given value as it is reported in microseconds + * and the register value is recorded in 2 microsecond units. */ - itr &= I40E_ITR_MASK; + interval >>= 1; + /* 1. Enable vector interrupt + * 2. Update the interval for the specified ITR index + * (I40E_ITR_NONE in the register is used to indicate that + * no interval update is requested) + */ val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1)); + FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) | + FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval); + + /* 3. Enforce software interrupt trigger if requested + * (These software interrupts rate is limited by ITR2 that is + * set to 20K interrupts per second) + */ + if (force_swint) + val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | + I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK | + FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK, + I40E_SW_ITR); return val; } -/* a small macro to shorten up some long lines */ -#define INTREG I40E_PFINT_DYN_CTLN - /* The act of updating the ITR will cause it to immediately trigger. In order * to prevent this from throwing off adaptive update statistics we defer the * update so that it can only happen so often. So after either Tx or Rx are @@ -2609,8 +2634,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { + enum i40e_dyn_idx itr_idx = I40E_ITR_NONE; struct i40e_hw *hw = &vsi->back->hw; - u32 intval; + u16 interval = 0; + u32 itr_val; /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { @@ -2632,8 +2659,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ if (q_vector->rx.target_itr < q_vector->rx.current_itr) { /* Rx ITR needs to be reduced, this is highest priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || @@ -2642,25 +2669,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, /* Tx ITR needs to be reduced, this is second priority * Tx ITR needs to be increased more than Rx, fourth priority */ - intval = i40e_buildreg_itr(I40E_TX_ITR, - q_vector->tx.target_itr); + itr_idx = I40E_TX_ITR; + interval = q_vector->tx.target_itr; q_vector->tx.current_itr = q_vector->tx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { /* Rx ITR needs to be increased, third priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else { /* No ITR update, lowest priority */ - intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); if (q_vector->itr_countdown) q_vector->itr_countdown--; } - if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), intval); + /* Do not update interrupt control register if VSI is down */ + if (test_bit(__I40E_VSI_DOWN, vsi->state)) + return; + + /* Update ITR interval if necessary and enforce software interrupt + * if we are exiting busy poll. + */ + if (q_vector->in_busy_poll) { + itr_val = i40e_buildreg_itr(itr_idx, interval, true); + q_vector->in_busy_poll = false; + } else { + itr_val = i40e_buildreg_itr(itr_idx, interval, false); + } + wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val); } /** @@ -2767,6 +2805,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) */ if (likely(napi_complete_done(napi, work_done))) i40e_update_enable_itr(vsi, q_vector); + else + q_vector->in_busy_poll = true; return min(work_done, budget - 1); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index f3b0b81517..054b7d1632 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -57,7 +57,7 @@ static inline u16 i40e_intrl_usec_to_reg(int intrl) * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any * register but instead is a special value meaning "don't update" ITR0/1/2. */ -enum i40e_dyn_idx_t { +enum i40e_dyn_idx { I40E_IDX_ITR0 = 0, I40E_IDX_ITR1 = 1, I40E_IDX_ITR2 = 2, @@ -67,6 +67,7 @@ enum i40e_dyn_idx_t { /* these are indexes into ITRN registers */ #define I40E_RX_ITR I40E_IDX_ITR0 #define I40E_TX_ITR I40E_IDX_ITR1 +#define I40E_SW_ITR I40E_IDX_ITR2 /* Supported RSS offloads */ #define I40E_DEFAULT_RSS_HENA ( \ @@ -301,7 +302,7 @@ struct i40e_rx_queue_stats { u64 realloc_count; }; -enum i40e_ring_state_t { +enum i40e_ring_state { __I40E_TX_FDIR_INIT_DONE, __I40E_TX_XPS_INIT_DONE, __I40E_RING_STATE_NBITS /* must be last */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 9ff8bf346b..d1635df17e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1626,8 +1626,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) { struct i40e_hw *hw = &pf->hw; struct i40e_vf *vf; - int i, v; u32 reg; + int i; /* If we don't have any VFs, then there is nothing to reset */ if (!pf->num_alloc_vfs) @@ -1638,11 +1638,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) return false; /* Begin reset on all VFs at once */ - for (v = 0; v < pf->num_alloc_vfs; v++) { - vf = &pf->vf[v]; + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is being reset no need to trigger reset again */ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) - i40e_trigger_vf_reset(&pf->vf[v], flr); + i40e_trigger_vf_reset(vf, flr); } /* HW requires some time to make sure it can flush the FIFO for a VF @@ -1651,14 +1650,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) * the VFs using a simple iterator that increments once that VF has * finished resetting. */ - for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) { + for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) { usleep_range(10000, 20000); /* Check each VF in sequence, beginning with the VF to fail * the previous check. */ - while (v < pf->num_alloc_vfs) { - vf = &pf->vf[v]; + while (vf < &pf->vf[pf->num_alloc_vfs]) { if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) { reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id)); if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK)) @@ -1668,7 +1666,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* If the current VF has finished resetting, move on * to the next VF in sequence. */ - v++; + ++vf; } } @@ -1678,39 +1676,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* Display a warning if at least one VF didn't manage to reset in * time, but continue on with the operation. */ - if (v < pf->num_alloc_vfs) + if (vf < &pf->vf[pf->num_alloc_vfs]) dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n", - pf->vf[v].vf_id); + vf->vf_id); usleep_range(10000, 20000); /* Begin disabling all the rings associated with VFs, but do not wait * between each VF. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]); } /* Now that we've notified HW to disable all of the VF rings, wait * until they finish. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]); } /* Hw may need up to 50ms to finish disabling the RX queues. We @@ -1719,12 +1717,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) mdelay(50); /* Finish the reset on each VF */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_cleanup_reset_vf(&pf->vf[v]); + i40e_cleanup_reset_vf(vf); } i40e_flush(hw); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 41b8ff0d4d..6073dcc414 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2901,6 +2901,34 @@ static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter) spin_unlock_bh(&adapter->cloud_filter_list_lock); } +/** + * iavf_is_tc_config_same - Compare the mqprio TC config with the + * TC config already configured on this adapter. + * @adapter: board private structure + * @mqprio_qopt: TC config received from kernel. + * + * This function compares the TC config received from the kernel + * with the config already configured on the adapter. + * + * Return: True if configuration is same, false otherwise. + **/ +static bool iavf_is_tc_config_same(struct iavf_adapter *adapter, + struct tc_mqprio_qopt *mqprio_qopt) +{ + struct virtchnl_channel_info *ch = &adapter->ch_config.ch_info[0]; + int i; + + if (adapter->num_tc != mqprio_qopt->num_tc) + return false; + + for (i = 0; i < adapter->num_tc; i++) { + if (ch[i].count != mqprio_qopt->count[i] || + ch[i].offset != mqprio_qopt->offset[i]) + return false; + } + return true; +} + /** * __iavf_setup_tc - configure multiple traffic classes * @netdev: network interface device structure @@ -2958,7 +2986,7 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) if (ret) return ret; /* Return if same TC config is requested */ - if (adapter->num_tc == num_tc) + if (iavf_is_tc_config_same(adapter, &mqprio_qopt->qopt)) return 0; adapter->num_tc = num_tc; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 60f73e775b..2440c82ea1 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3394,7 +3394,6 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) struct ice_pf *pf = vsi->back; int new_rx = 0, new_tx = 0; bool locked = false; - u32 curr_combined; int ret = 0; /* do not support changing channels in Safe Mode */ @@ -3411,22 +3410,8 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) return -EOPNOTSUPP; } - curr_combined = ice_get_combined_cnt(vsi); - - /* these checks are for cases where user didn't specify a particular - * value on cmd line but we get non-zero value anyway via - * get_channels(); look at ethtool.c in ethtool repository (the user - * space part), particularly, do_schannels() routine - */ - if (ch->rx_count == vsi->num_rxq - curr_combined) - ch->rx_count = 0; - if (ch->tx_count == vsi->num_txq - curr_combined) - ch->tx_count = 0; - if (ch->combined_count == curr_combined) - ch->combined_count = 0; - - if (!(ch->combined_count || (ch->rx_count && ch->tx_count))) { - netdev_err(dev, "Please specify at least 1 Rx and 1 Tx channel\n"); + if (ch->rx_count && ch->tx_count) { + netdev_err(dev, "Dedicated RX or TX channels cannot be used simultaneously\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 69d11ff767..ac198c00b4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -909,7 +909,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) goto err_out; } - xs = kzalloc(sizeof(*xs), GFP_KERNEL); + algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); + if (unlikely(!algo)) { + err = -ENOENT; + goto err_out; + } + + xs = kzalloc(sizeof(*xs), GFP_ATOMIC); if (unlikely(!xs)) { err = -ENOMEM; goto err_out; @@ -925,14 +931,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4)); xs->xso.dev = adapter->netdev; - algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); - if (unlikely(!algo)) { - err = -ENOENT; - goto err_xs; - } - aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8; - xs->aead = kzalloc(aead_len, GFP_KERNEL); + xs->aead = kzalloc(aead_len, GFP_ATOMIC); if (unlikely(!xs->aead)) { err = -ENOMEM; goto err_xs; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 3ade1a6e2f..4dec201158 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -787,6 +787,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id, if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 4dddf6ec3b..e201827529 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -559,12 +559,10 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, u16 pcifunc; int ret, lf; - cmd_buf = memdup_user(buffer, count + 1); + cmd_buf = memdup_user_nul(buffer, count); if (IS_ERR(cmd_buf)) return -ENOMEM; - cmd_buf[count] = '\0'; - cmd_buf_tmp = strchr(cmd_buf, '\n'); if (cmd_buf_tmp) { *cmd_buf_tmp = '\0'; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index bda93e550b..34a9a9164f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4184,18 +4184,18 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) */ rvu_write64(rvu, blkaddr, NIX_AF_CFG, rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL); + } - /* Set chan/link to backpressure TL3 instead of TL2 */ - rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01); + /* Set chan/link to backpressure TL3 instead of TL2 */ + rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01); - /* Disable SQ manager's sticky mode operation (set TM6 = 0) - * This sticky mode is known to cause SQ stalls when multiple - * SQs are mapped to same SMQ and transmitting pkts at a time. - */ - cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS); - cfg &= ~BIT_ULL(15); - rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg); - } + /* Disable SQ manager's sticky mode operation (set TM6 = 0) + * This sticky mode is known to cause SQ stalls when multiple + * SQs are mapped to same SMQ and transmitting pkts at a time. + */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS); + cfg &= ~BIT_ULL(15); + rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg); ltdefs = rvu->kpu.lt_def; /* Calibrate X2P bus to check if CGX/LBK links are fine */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 604aaa9b96..c6b6d709e5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1622,7 +1622,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz, struct npc_coalesced_kpu_prfl *img_data = NULL; int i = 0, rc = -EINVAL; void __iomem *kpu_prfl_addr; - u16 offset; + u32 offset; img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr; if (le64_to_cpu(img_data->signature) == KPU_SIGN && diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 167b926196..5f093b34db 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1795,7 +1795,7 @@ int otx2_open(struct net_device *netdev) * mcam entries are enabled to receive the packets. Hence disable the * packet I/O. */ - if (err == EIO) + if (err == -EIO) goto err_disable_rxtx; else if (err) goto err_tx_stop_queues; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 41c15a65fb..81e517dbe6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -114,15 +114,18 @@ static u8 alloc_token(struct mlx5_cmd *cmd) return token; } -static int cmd_alloc_index(struct mlx5_cmd *cmd) +static int cmd_alloc_index(struct mlx5_cmd *cmd, struct mlx5_cmd_work_ent *ent) { unsigned long flags; int ret; spin_lock_irqsave(&cmd->alloc_lock, flags); ret = find_first_bit(&cmd->bitmask, cmd->max_reg_cmds); - if (ret < cmd->max_reg_cmds) + if (ret < cmd->max_reg_cmds) { clear_bit(ret, &cmd->bitmask); + ent->idx = ret; + cmd->ent_arr[ent->idx] = ent; + } spin_unlock_irqrestore(&cmd->alloc_lock, flags); return ret < cmd->max_reg_cmds ? ret : -ENOMEM; @@ -924,7 +927,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index(cmd); + alloc_ret = cmd_alloc_index(cmd, ent); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { @@ -939,15 +942,14 @@ static void cmd_work_handler(struct work_struct *work) up(sem); return; } - ent->idx = alloc_ret; } else { ent->idx = cmd->max_reg_cmds; spin_lock_irqsave(&cmd->alloc_lock, flags); clear_bit(ent->idx, &cmd->bitmask); + cmd->ent_arr[ent->idx] = ent; spin_unlock_irqrestore(&cmd->alloc_lock, flags); } - cmd->ent_arr[ent->idx] = ent; lay = get_inst(cmd, ent->idx); ent->lay = lay; memset(lay, 0, sizeof(*lay)); @@ -1533,6 +1535,9 @@ static int cmd_comp_notifier(struct notifier_block *nb, dev = container_of(cmd, struct mlx5_core_dev, cmd); eqe = data; + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + return NOTIFY_DONE; + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); return NOTIFY_OK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index 428881e0ad..6621f6cd43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -105,18 +105,11 @@ mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features) if (!x || !x->xso.offload_handle) goto out_disable; - if (xo->inner_ipproto) { - /* Cannot support tunnel packet over IPsec tunnel mode - * because we cannot offload three IP header csum - */ - if (x->props.mode == XFRM_MODE_TUNNEL) - goto out_disable; - - /* Only support UDP or TCP L4 checksum */ - if (xo->inner_ipproto != IPPROTO_UDP && - xo->inner_ipproto != IPPROTO_TCP) - goto out_disable; - } + /* Only support UDP or TCP L4 checksum */ + if (xo->inner_ipproto && + xo->inner_ipproto != IPPROTO_UDP && + xo->inner_ipproto != IPPROTO_TCP) + goto out_disable; return features; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 923be5fb7d..79d687c663 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3185,7 +3185,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) mlx5e_fold_sw_stats64(priv, stats); } - stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; + stats->rx_missed_errors = priv->stats.qcnt.rx_out_of_buffer; stats->rx_length_errors = PPORT_802_3_GET(pstats, a_in_range_length_errors) + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 161ad2ae40..a55cacb988 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1682,8 +1682,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, } trace_mlx5_fs_set_fte(fte, false); + /* Link newly added rules into the tree. */ for (i = 0; i < handle->num_rules; i++) { - if (refcount_read(&handle->rule[i]->node.refcount) == 1) { + if (!handle->rule[i]->node.parent) { tree_add_node(&handle->rule[i]->node, &fte->node); trace_mlx5_fs_add_rule(handle->rule[i]); } diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 679415a64f..c644ee78e0 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "mlxbf_gige.h" @@ -141,13 +142,10 @@ static int mlxbf_gige_open(struct net_device *netdev) control |= MLXBF_GIGE_CONTROL_PORT_EN; writeq(control, priv->base + MLXBF_GIGE_CONTROL); - err = mlxbf_gige_request_irqs(priv); - if (err) - return err; mlxbf_gige_cache_stats(priv); err = mlxbf_gige_clean_port(priv); if (err) - goto free_irqs; + return err; /* Clear driver's valid_polarity to match hardware, * since the above call to clean_port() resets the @@ -159,7 +157,7 @@ static int mlxbf_gige_open(struct net_device *netdev) err = mlxbf_gige_tx_init(priv); if (err) - goto free_irqs; + goto phy_deinit; err = mlxbf_gige_rx_init(priv); if (err) goto tx_deinit; @@ -168,6 +166,10 @@ static int mlxbf_gige_open(struct net_device *netdev) napi_enable(&priv->napi); netif_start_queue(netdev); + err = mlxbf_gige_request_irqs(priv); + if (err) + goto napi_deinit; + /* Set bits in INT_EN that we care about */ int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | @@ -184,11 +186,17 @@ static int mlxbf_gige_open(struct net_device *netdev) return 0; +napi_deinit: + netif_stop_queue(netdev); + napi_disable(&priv->napi); + netif_napi_del(&priv->napi); + mlxbf_gige_rx_deinit(priv); + tx_deinit: mlxbf_gige_tx_deinit(priv); -free_irqs: - mlxbf_gige_free_irqs(priv); +phy_deinit: + phy_stop(phydev); return err; } @@ -412,8 +420,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev) { struct mlxbf_gige *priv = platform_get_drvdata(pdev); - writeq(0, priv->base + MLXBF_GIGE_INT_EN); - mlxbf_gige_clean_port(priv); + rtnl_lock(); + netif_device_detach(priv->netdev); + + if (netif_running(priv->netdev)) + dev_close(priv->netdev); + + rtnl_unlock(); } static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index f080fab3de..d1102d5613 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -746,7 +746,7 @@ static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port, static const struct mlxsw_listener mlxsw_emad_rx_listener = MLXSW_RXL(mlxsw_emad_rx_listener_func, ETHEMAD, TRAP_TO_CPU, false, - EMAD, DISCARD); + EMAD, FORWARD); static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 483c8b75be..46b1120a81 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -713,7 +713,9 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) rehash.dw.work); int credits = MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS; + mutex_lock(&vregion->lock); mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion, &credits); + mutex_unlock(&vregion->lock); if (credits < 0) /* Rehash gone out of credits so it was interrupted. * Schedule the work as soon as possible to continue. @@ -723,6 +725,17 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); } +static void +mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + /* The entry markers are relative to the current chunk and therefore + * needs to be reset together with the chunk marker. + */ + ctx->current_vchunk = NULL; + ctx->start_ventry = NULL; + ctx->stop_ventry = NULL; +} + static void mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(struct mlxsw_sp_acl_tcam_vchunk *vchunk) { @@ -745,7 +758,7 @@ mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(struct mlxsw_sp_acl_tcam_vregion *v * the current chunk pointer to make sure all chunks * are properly migrated. */ - vregion->rehash.ctx.current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(&vregion->rehash.ctx); } static struct mlxsw_sp_acl_tcam_vregion * @@ -818,10 +831,14 @@ mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam = vregion->tcam; if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) { + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx = &vregion->rehash.ctx; + mutex_lock(&tcam->lock); list_del(&vregion->tlist); mutex_unlock(&tcam->lock); - cancel_delayed_work_sync(&vregion->rehash.dw); + if (cancel_delayed_work_sync(&vregion->rehash.dw) && + ctx->hints_priv) + ops->region_rehash_hints_put(ctx->hints_priv); } mlxsw_sp_acl_tcam_vgroup_vregion_detach(mlxsw_sp, vregion); if (vregion->region2) @@ -1187,8 +1204,14 @@ mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_ventry *ventry, bool *activity) { - return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, - ventry->entry, activity); + struct mlxsw_sp_acl_tcam_vregion *vregion = ventry->vchunk->vregion; + int err; + + mutex_lock(&vregion->lock); + err = mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, ventry->entry, + activity); + mutex_unlock(&vregion->lock); + return err; } static int @@ -1222,6 +1245,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_start(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_chunk *new_chunk; + WARN_ON(vchunk->chunk2); + new_chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region); if (IS_ERR(new_chunk)) return PTR_ERR(new_chunk); @@ -1240,7 +1265,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_end(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); vchunk->chunk2 = NULL; - ctx->current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); } static int @@ -1263,6 +1288,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, return 0; } + if (list_empty(&vchunk->ventry_list)) + goto out; + /* If the migration got interrupted, we have the ventry to start from * stored in context. */ @@ -1272,6 +1300,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, ventry = list_first_entry(&vchunk->ventry_list, typeof(*ventry), list); + WARN_ON(ventry->vchunk != vchunk); + list_for_each_entry_from(ventry, &vchunk->ventry_list, list) { /* During rollback, once we reach the ventry that failed * to migrate, we are done. @@ -1312,6 +1342,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, } } +out: mlxsw_sp_acl_tcam_vchunk_migrate_end(mlxsw_sp, vchunk, ctx); return 0; } @@ -1325,6 +1356,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_vchunk *vchunk; int err; + if (list_empty(&vregion->vchunk_list)) + return 0; + /* If the migration got interrupted, we have the vchunk * we are working on stored in context. */ @@ -1353,16 +1387,17 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, int err, err2; trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion); - mutex_lock(&vregion->lock); err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); if (err) { + if (ctx->this_is_rollback) + return err; /* In case migration was not successful, we need to swap * so the original region pointer is assigned again * to vregion->region. */ swap(vregion->region, vregion->region2); - ctx->current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); ctx->this_is_rollback = true; err2 = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); @@ -1373,7 +1408,6 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, /* Let the rollback to be continued later on. */ } } - mutex_unlock(&vregion->lock); trace_mlxsw_sp_acl_tcam_vregion_migrate_end(mlxsw_sp, vregion); return err; } @@ -1422,6 +1456,7 @@ mlxsw_sp_acl_tcam_vregion_rehash_start(struct mlxsw_sp *mlxsw_sp, ctx->hints_priv = hints_priv; ctx->this_is_rollback = false; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); return 0; @@ -1474,7 +1509,8 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, ctx, credits); if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + return; } if (*credits >= 0) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 189a6a0a2e..8561a7bf53 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -730,7 +730,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5, bool sgmii = false, inband_aneg = false; int err; - if (port->conf.inband) { + if (conf->inband) { if (conf->portmode == PHY_INTERFACE_MODE_SGMII || conf->portmode == PHY_INTERFACE_MODE_QSGMII) inband_aneg = true; /* Cisco-SGMII in-band-aneg */ @@ -947,7 +947,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5, if (err) return -EINVAL; - if (port->conf.inband) { + if (conf->inband) { /* Enable/disable 1G counters in ASM */ spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev), ASM_PORT_CFG_CSC_STAT_DIS, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 6318186680..1f84ba638e 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3232,9 +3232,12 @@ static int ionic_lif_adminq_init(struct ionic_lif *lif) napi_enable(&qcq->napi); - if (qcq->flags & IONIC_QCQ_F_INTR) + if (qcq->flags & IONIC_QCQ_F_INTR) { + irq_set_affinity_hint(qcq->intr.vector, + &qcq->intr.affinity_mask); ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, IONIC_INTR_MASK_CLEAR); + } qcq->flags |= IONIC_QCQ_F_INITED; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 6e902d57c7..99a6d11fec 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1238,7 +1238,6 @@ static void qed_slowpath_task(struct work_struct *work) static int qed_slowpath_wq_start(struct qed_dev *cdev) { struct qed_hwfn *hwfn; - char name[NAME_SIZE]; int i; if (IS_VF(cdev)) @@ -1247,11 +1246,11 @@ static int qed_slowpath_wq_start(struct qed_dev *cdev) for_each_hwfn(cdev, i) { hwfn = &cdev->hwfns[i]; - snprintf(name, NAME_SIZE, "slowpath-%02x:%02x.%02x", - cdev->pdev->bus->number, - PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id); + hwfn->slowpath_wq = alloc_workqueue("slowpath-%02x:%02x.%02x", + 0, 0, cdev->pdev->bus->number, + PCI_SLOT(cdev->pdev->devfn), + hwfn->abs_pf_id); - hwfn->slowpath_wq = alloc_workqueue(name, 0, 0); if (!hwfn->slowpath_wq) { DP_NOTICE(hwfn, "Cannot create slowpath workqueue\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 3010833ddd..8871099b99 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1868,8 +1868,8 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, struct flow_cls_offload *f) { struct qede_arfs_fltr_node *n; - int min_hlen, rc = -EINVAL; struct qede_arfs_tuple t; + int min_hlen, rc; __qede_lock(edev); @@ -1879,7 +1879,8 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse flower attribute and prepare filter */ - if (qede_parse_flow_attr(edev, proto, f->rule, &t)) + rc = qede_parse_flow_attr(edev, proto, f->rule, &t); + if (rc) goto unlock; /* Validate profile mode and number of filters */ @@ -1888,11 +1889,13 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, DP_NOTICE(edev, "Filter configuration invalidated, filter mode=0x%x, configured mode=0x%x, filter count=0x%x\n", t.mode, edev->arfs->mode, edev->arfs->filter_count); + rc = -EINVAL; goto unlock; } /* parse tc actions and get the vf_id */ - if (qede_parse_actions(edev, &f->rule->action, f->common.extack)) + rc = qede_parse_actions(edev, &f->rule->action, f->common.extack); + if (rc) goto unlock; if (qede_flow_find_fltr(edev, &t)) { @@ -1998,10 +2001,9 @@ static int qede_flow_spec_to_rule(struct qede_dev *edev, if (IS_ERR(flow)) return PTR_ERR(flow); - if (qede_parse_flow_attr(edev, proto, flow->rule, t)) { - err = -EINVAL; + err = qede_parse_flow_attr(edev, proto, flow->rule, t); + if (err) goto err_out; - } /* Make sure location is valid and filter isn't already set */ err = qede_flow_spec_validate(edev, &flow->rule->action, t, diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index b8ac07be61..76d820c4e6 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4273,11 +4273,11 @@ static void rtl8169_doorbell(struct rtl8169_private *tp) static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev) { - unsigned int frags = skb_shinfo(skb)->nr_frags; struct rtl8169_private *tp = netdev_priv(dev); unsigned int entry = tp->cur_tx % NUM_TX_DESC; struct TxDesc *txd_first, *txd_last; bool stop_queue, door_bell; + unsigned int frags; u32 opts[2]; if (unlikely(!rtl_tx_slots_avail(tp))) { @@ -4300,6 +4300,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, txd_first = tp->TxDescArray + entry; + frags = skb_shinfo(skb)->nr_frags; if (frags) { if (rtl8169_xmit_frags(tp, skb, opts, entry)) goto err_dma_1; @@ -4617,10 +4618,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); } - if (napi_schedule_prep(&tp->napi)) { - rtl_irq_disable(tp); - __napi_schedule(&tp->napi); - } + rtl_irq_disable(tp); + napi_schedule(&tp->napi); out: rtl_ack_events(tp, status); @@ -5138,6 +5137,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp) struct mii_bus *new_bus; int ret; + /* On some boards with this chip version the BIOS is buggy and misses + * to reset the PHY page selector. This results in the PHY ID read + * accessing registers on a different page, returning a more or + * less random value. Fix this by resetting the page selector first. + */ + if (tp->mac_version == RTL_GIGA_MAC_VER_25 || + tp->mac_version == RTL_GIGA_MAC_VER_26) + r8169_mdio_write(tp, 0x1f, 0); + new_bus = devm_mdiobus_alloc(&pdev->dev); if (!new_bus) return -ENOMEM; @@ -5168,7 +5176,7 @@ static int r8169_mdio_register(struct rtl8169_private *tp) return -EUNATCH; } - tp->phydev->mac_managed_pm = 1; + tp->phydev->mac_managed_pm = true; phy_support_asym_pause(tp->phydev); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 2bf5d4c208..eee446e500 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -969,12 +969,12 @@ static int ravb_poll(struct napi_struct *napi, int budget) int q = napi - priv->napi; int mask = BIT(q); int quota = budget; + bool unmask; /* Processing RX Descriptor Ring */ /* Clear RX interrupt */ ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); - if (ravb_rx(ndev, "a, q)) - goto out; + unmask = !ravb_rx(ndev, "a, q); /* Processing TX Descriptor Ring */ spin_lock_irqsave(&priv->lock, flags); @@ -984,6 +984,9 @@ static int ravb_poll(struct napi_struct *napi, int budget) netif_wake_subqueue(ndev, q); spin_unlock_irqrestore(&priv->lock, flags); + if (!unmask) + goto out; + napi_complete(napi); /* Re-enable RX/TX interrupts */ @@ -2067,6 +2070,7 @@ static const struct of_device_id ravb_match_table[] = { { .compatible = "renesas,etheravb-rcar-gen2", .data = &ravb_gen2_hw_info }, { .compatible = "renesas,etheravb-r8a7795", .data = &ravb_gen3_hw_info }, { .compatible = "renesas,etheravb-rcar-gen3", .data = &ravb_gen3_hw_info }, + { .compatible = "renesas,etheravb-rcar-gen4", .data = &ravb_gen3_hw_info }, { } }; MODULE_DEVICE_TABLE(of, ravb_match_table); diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index 387539a809..95e9204ce8 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -175,8 +175,8 @@ static inline void mcf_outsw(void *a, unsigned char *p, int l) writew(*wp++, a); } -#define SMC_inw(a, r) _swapw(readw((a) + (r))) -#define SMC_outw(lp, v, a, r) writew(_swapw(v), (a) + (r)) +#define SMC_inw(a, r) ioread16be((a) + (r)) +#define SMC_outw(lp, v, a, r) iowrite16be(v, (a) + (r)) #define SMC_insw(a, r, p, l) mcf_insw(a + r, p, l) #define SMC_outsw(a, r, p, l) mcf_outsw(a + r, p, l) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index f6d6a6d9c5..026e3645e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -88,19 +88,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw, u32 prio, u32 queue) { void __iomem *ioaddr = hw->pcsr; - u32 base_register; - u32 value; + u32 clear_mask = 0; + u32 ctrl2, ctrl3; + int i; - base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3; - if (queue >= 4) - queue -= 4; + ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2); + ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3); - value = readl(ioaddr + base_register); + /* The software must ensure that the same priority + * is not mapped to multiple Rx queues + */ + for (i = 0; i < 4; i++) + clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) & + GMAC_RXQCTRL_PSRQX_MASK(i)); - value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue); - value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & + ctrl2 &= ~clear_mask; + ctrl3 &= ~clear_mask; + + /* First assign new priorities to a queue, then + * clear them from others queues + */ + if (queue < 4) { + ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & GMAC_RXQCTRL_PSRQX_MASK(queue); - writel(value, ioaddr + base_register); + + writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); + writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); + } else { + queue -= 4; + + ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & + GMAC_RXQCTRL_PSRQX_MASK(queue); + + writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); + writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); + } } static void dwmac4_tx_queue_priority(struct mac_device_info *hw, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index ec1616ffbf..dd73f38ec0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -97,17 +97,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio, u32 queue) { void __iomem *ioaddr = hw->pcsr; - u32 value, reg; + u32 clear_mask = 0; + u32 ctrl2, ctrl3; + int i; - reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3; - if (queue >= 4) + ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2); + ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3); + + /* The software must ensure that the same priority + * is not mapped to multiple Rx queues + */ + for (i = 0; i < 4; i++) + clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) & + XGMAC_PSRQ(i)); + + ctrl2 &= ~clear_mask; + ctrl3 &= ~clear_mask; + + /* First assign new priorities to a queue, then + * clear them from others queues + */ + if (queue < 4) { + ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & + XGMAC_PSRQ(queue); + + writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); + writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); + } else { queue -= 4; - value = readl(ioaddr + reg); - value &= ~XGMAC_PSRQ(queue); - value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue); + ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & + XGMAC_PSRQ(queue); - writel(value, ioaddr + reg); + writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); + writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); + } } static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio, diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index d72018a60c..e14c1ac767 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -949,17 +949,6 @@ static irqreturn_t gem_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void gem_poll_controller(struct net_device *dev) -{ - struct gem *gp = netdev_priv(dev); - - disable_irq(gp->pdev->irq); - gem_interrupt(gp->pdev->irq, dev); - enable_irq(gp->pdev->irq); -} -#endif - static void gem_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gem *gp = netdev_priv(dev); @@ -2836,9 +2825,6 @@ static const struct net_device_ops gem_netdev_ops = { .ndo_change_mtu = gem_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = gem_set_mac_address, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = gem_poll_controller, -#endif }; static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index f94d6d322d..4bd57b79a0 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2535,6 +2535,8 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) { + struct am65_cpsw_rx_chn *rx_chan = &common->rx_chns; + struct am65_cpsw_tx_chn *tx_chan = common->tx_chns; struct device *dev = common->dev; struct devlink_port *dl_port; struct am65_cpsw_port *port; @@ -2553,6 +2555,22 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) return ret; } + /* The DMA Channels are not guaranteed to be in a clean state. + * Reset and disable them to ensure that they are back to the + * clean state and ready to be used. + */ + for (i = 0; i < common->tx_ch_num; i++) { + k3_udma_glue_reset_tx_chn(tx_chan[i].tx_chn, &tx_chan[i], + am65_cpsw_nuss_tx_cleanup); + k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn); + } + + for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) + k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan, + am65_cpsw_nuss_rx_cleanup, !!i); + + k3_udma_glue_disable_rx_chn(rx_chan->rx_chn); + ret = am65_cpsw_nuss_register_devlink(common); if (ret) return ret; diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index c30a6e510a..1552dfee4e 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -649,6 +649,11 @@ static bool am65_cpts_match_tx_ts(struct am65_cpts *cpts, struct am65_cpts_skb_cb_data *skb_cb = (struct am65_cpts_skb_cb_data *)skb->cb; + if ((ptp_classify_raw(skb) & PTP_CLASS_V1) && + ((mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK) == + (skb_cb->skb_mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK))) + mtype_seqid = skb_cb->skb_mtype_seqid; + if (mtype_seqid == skb_cb->skb_mtype_seqid) { u64 ns = event->timestamp; diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index b4db50c9e7..4e45153959 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1433,7 +1433,7 @@ static int temac_probe(struct platform_device *pdev) } /* map device registers */ - lp->regs = devm_platform_ioremap_resource_byname(pdev, 0); + lp->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(lp->regs)) { dev_err(&pdev->dev, "could not map TEMAC registers\n"); return -ENOMEM; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 9569b5cc59..0e4ea3c0fe 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -909,7 +909,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL; sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); @@ -1006,7 +1006,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL; sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index ce93316f5f..3271428e64 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -713,11 +713,12 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, static void gtp_dellink(struct net_device *dev, struct list_head *head) { struct gtp_dev *gtp = netdev_priv(dev); + struct hlist_node *next; struct pdp_ctx *pctx; int i; for (i = 0; i < gtp->hash_size; i++) - hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) + hlist_for_each_entry_safe(pctx, next, >p->tid_hash[i], hlist_tid) pdp_context_delete(pctx); list_del_rcu(>p->list); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 5aa9217240..a18b49db38 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -440,7 +440,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - err = ip_local_out(net, skb->sk, skb); + err = ip_local_out(net, NULL, skb); if (unlikely(net_xmit_eval(err))) DEV_STATS_INC(dev, tx_errors); else @@ -495,7 +495,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - err = ip6_local_out(dev_net(dev), skb->sk, skb); + err = ip6_local_out(dev_net(dev), NULL, skb); if (unlikely(net_xmit_eval(err))) DEV_STATS_INC(dev, tx_errors); else diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 556ca98843..ab382496c3 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -242,7 +242,7 @@ static int xpcs_soft_reset(struct dw_xpcs *xpcs, dev = MDIO_MMD_VEND2; break; default: - return -1; + return -EINVAL; } ret = xpcs_write(xpcs, dev, MDIO_CTRL1, MDIO_CTRL1_RESET); @@ -808,7 +808,7 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, return ret; break; default: - return -1; + return -EINVAL; } if (compat->pma_config) { diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index dc209ad8a0..59d05a1672 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1669,6 +1669,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, /* PHY_BASIC_FEATURES */ .config_init = ksz8061_config_init, + .soft_reset = genphy_soft_reset, .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, .suspend = genphy_suspend, diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 42bf0a3ec6..f0e34b2b07 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2096,14 +2096,16 @@ static ssize_t tun_put_user(struct tun_struct *tun, tun_is_little_endian(tun), true, vlan_hlen)) { struct skb_shared_info *sinfo = skb_shinfo(skb); - pr_err("unexpected GSO type: " - "0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), - tun16_to_cpu(tun, gso.hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); + + if (net_ratelimit()) { + netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", + sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), + tun16_to_cpu(tun, gso.hdr_len)); + print_hex_dump(KERN_ERR, "tun: ", + DUMP_PREFIX_NONE, + 16, 1, skb->head, + min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); + } WARN_ON_ONCE(1); return -EINVAL; } diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index 6db37eb6c5..4b48a5c09b 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1141,17 +1141,15 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb) continue; } - /* Clone SKB */ - new_skb = skb_clone(skb, GFP_ATOMIC); + new_skb = netdev_alloc_skb_ip_align(dev->net, pkt_len); if (!new_skb) goto err; - new_skb->len = pkt_len; + skb_put(new_skb, pkt_len); + memcpy(new_skb->data, skb->data, pkt_len); skb_pull(new_skb, AQ_RX_HW_PAD); - skb_set_tail_pointer(new_skb, new_skb->len); - new_skb->truesize = SKB_TRUESIZE(new_skb->len); if (aqc111_data->rx_checksum) aqc111_rx_checksum(new_skb, pkt_desc); diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index c126df1c13..9da88e132d 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -157,6 +157,8 @@ #define AX_EEPROM_MAGIC 0xdeadbeef #define AX_EEPROM_LEN 0x200 +#define AX_EMBD_PHY_ADDR 0x10 + /* This structure cannot exceed sizeof(unsigned long [5]) AKA 20 bytes */ struct asix_data { u8 multi_filter[AX_MCAST_FILTER_SIZE]; @@ -181,6 +183,7 @@ struct asix_common_private { struct asix_rx_fixup_info rx_fixup_info; struct mii_bus *mdio; struct phy_device *phydev; + struct phy_device *phydev_int; u16 phy_addr; char phy_name[20]; bool embd_phy; diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 396505396a..5a4137b253 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -694,10 +694,26 @@ static int ax88772_init_phy(struct usbnet *dev) } phy_suspend(priv->phydev); - priv->phydev->mac_managed_pm = 1; + priv->phydev->mac_managed_pm = true; phy_attached_info(priv->phydev); + if (priv->embd_phy) + return 0; + + /* In case main PHY is not the embedded PHY and MAC is RMII clock + * provider, we need to suspend embedded PHY by keeping PLL enabled + * (AX_SWRESET_IPPD == 0). + */ + priv->phydev_int = mdiobus_get_phy(priv->mdio, AX_EMBD_PHY_ADDR); + if (!priv->phydev_int) { + netdev_err(dev->net, "Could not find internal PHY\n"); + return -ENODEV; + } + + priv->phydev_int->mac_managed_pm = true; + phy_suspend(priv->phydev_int); + return 0; } @@ -753,7 +769,7 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) return ret; priv->phy_addr = ret; - priv->embd_phy = ((priv->phy_addr & 0x1f) == 0x10); + priv->embd_phy = ((priv->phy_addr & 0x1f) == AX_EMBD_PHY_ADDR); ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0); if (ret < 0) { diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index f11748cc5b..868fad2f8c 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1478,21 +1478,16 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* Skip IP alignment pseudo header */ skb_pull(skb, 2); - skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd); ax88179_rx_checksum(skb, pkt_hdr); return 1; } - ax_skb = skb_clone(skb, GFP_ATOMIC); + ax_skb = netdev_alloc_skb_ip_align(dev->net, pkt_len); if (!ax_skb) return 0; - skb_trim(ax_skb, pkt_len); + skb_put(ax_skb, pkt_len); + memcpy(ax_skb->data, skb->data + 2, pkt_len); - /* Skip IP alignment pseudo header */ - skb_pull(ax_skb, 2); - - skb->truesize = pkt_len_plus_padd + - SKB_DATA_ALIGN(sizeof(struct sk_buff)); ax88179_rx_checksum(ax_skb, pkt_hdr); usbnet_skb_return(dev, ax_skb); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 846ace9830..9bd145732e 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1362,6 +1362,9 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a9, 0)}, /* Telit FN920C04 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ @@ -1419,6 +1422,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ + {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 5f962f58ff..8a38939dd5 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -842,7 +842,7 @@ static int smsc95xx_start_rx_path(struct usbnet *dev, int in_pm) static int smsc95xx_reset(struct usbnet *dev) { struct smsc95xx_priv *pdata = dev->driver_priv; - u32 read_buf, write_buf, burst_cap; + u32 read_buf, burst_cap; int ret = 0, timeout; netif_dbg(dev, ifup, dev->net, "entering smsc95xx_reset\n"); @@ -984,10 +984,13 @@ static int smsc95xx_reset(struct usbnet *dev) return ret; netif_dbg(dev, ifup, dev->net, "ID_REV = 0x%08x\n", read_buf); + ret = smsc95xx_read_reg(dev, LED_GPIO_CFG, &read_buf); + if (ret < 0) + return ret; /* Configure GPIO pins as LED outputs */ - write_buf = LED_GPIO_CFG_SPD_LED | LED_GPIO_CFG_LNK_LED | - LED_GPIO_CFG_FDX_LED; - ret = smsc95xx_write_reg(dev, LED_GPIO_CFG, write_buf); + read_buf |= LED_GPIO_CFG_SPD_LED | LED_GPIO_CFG_LNK_LED | + LED_GPIO_CFG_FDX_LED; + ret = smsc95xx_write_reg(dev, LED_GPIO_CFG, read_buf); if (ret < 0) return ret; @@ -1785,9 +1788,11 @@ static int smsc95xx_reset_resume(struct usb_interface *intf) static void smsc95xx_rx_csum_offload(struct sk_buff *skb) { - skb->csum = *(u16 *)(skb_tail_pointer(skb) - 2); + u16 *csum_ptr = (u16 *)(skb_tail_pointer(skb) - 2); + + skb->csum = (__force __wsum)get_unaligned(csum_ptr); skb->ip_summed = CHECKSUM_COMPLETE; - skb_trim(skb, skb->len - 2); + skb_trim(skb, skb->len - 2); /* remove csum */ } static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) @@ -1845,25 +1850,22 @@ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (dev->net->features & NETIF_F_RXCSUM) smsc95xx_rx_csum_offload(skb); skb_trim(skb, skb->len - 4); /* remove fcs */ - skb->truesize = size + sizeof(struct sk_buff); return 1; } - ax_skb = skb_clone(skb, GFP_ATOMIC); + ax_skb = netdev_alloc_skb_ip_align(dev->net, size); if (unlikely(!ax_skb)) { netdev_warn(dev->net, "Error allocating skb\n"); return 0; } - ax_skb->len = size; - ax_skb->data = packet; - skb_set_tail_pointer(ax_skb, size); + skb_put(ax_skb, size); + memcpy(ax_skb->data, packet, size); if (dev->net->features & NETIF_F_RXCSUM) smsc95xx_rx_csum_offload(ax_skb); skb_trim(ax_skb, ax_skb->len - 4); /* remove fcs */ - ax_skb->truesize = size + sizeof(struct sk_buff); usbnet_skb_return(dev, ax_skb); } diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 279a540aef..1c4a4bd46b 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -419,19 +419,15 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb) skb_pull(skb, 3); skb->len = len; skb_set_tail_pointer(skb, len); - skb->truesize = len + sizeof(struct sk_buff); return 2; } - /* skb_clone is used for address align */ - sr_skb = skb_clone(skb, GFP_ATOMIC); + sr_skb = netdev_alloc_skb_ip_align(dev->net, len); if (!sr_skb) return 0; - sr_skb->len = len; - sr_skb->data = skb->data + 3; - skb_set_tail_pointer(sr_skb, len); - sr_skb->truesize = len + sizeof(struct sk_buff); + skb_put(sr_skb, len); + memcpy(sr_skb->data, skb->data + 3, len); usbnet_skb_return(dev, sr_skb); skb_pull(skb, len + SR_RX_OVERHEAD); diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index f5bc279c9a..9dc02fa51e 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -164,8 +164,8 @@ get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) if (!allowedips_node) goto no_allowedips; if (!ctx->allowedips_seq) - ctx->allowedips_seq = peer->device->peer_allowedips.seq; - else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq) + ctx->allowedips_seq = ctx->wg->peer_allowedips.seq; + else if (ctx->allowedips_seq != ctx->wg->peer_allowedips.seq) goto no_allowedips; allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); @@ -255,17 +255,17 @@ static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!peers_nest) goto out; ret = 0; - /* If the last cursor was removed via list_del_init in peer_remove, then + lockdep_assert_held(&wg->device_update_lock); + /* If the last cursor was removed in peer_remove or peer_remove_all, then * we just treat this the same as there being no more peers left. The * reason is that seq_nr should indicate to userspace that this isn't a * coherent dump anyway, so they'll try again. */ if (list_empty(&wg->peer_list) || - (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) { + (ctx->next_peer && ctx->next_peer->is_dead)) { nla_nest_cancel(skb, peers_nest); goto out; } - lockdep_assert_held(&wg->device_update_lock); peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { if (get_peer(peer, skb, ctx)) { diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index efe38b2c1d..71c2bf8817 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1590,6 +1590,20 @@ static int ar5523_probe(struct usb_interface *intf, struct ar5523 *ar; int error = -ENOMEM; + static const u8 bulk_ep_addr[] = { + AR5523_CMD_TX_PIPE | USB_DIR_OUT, + AR5523_DATA_TX_PIPE | USB_DIR_OUT, + AR5523_CMD_RX_PIPE | USB_DIR_IN, + AR5523_DATA_RX_PIPE | USB_DIR_IN, + 0}; + + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr)) { + dev_err(&dev->dev, + "Could not find all expected endpoints\n"); + error = -ENODEV; + goto out; + } + /* * Load firmware if the device requires it. This will return * -ENXIO on success and we'll get called back afer the usb diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index eca24a6116..4a93c415db 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -640,6 +640,9 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .max_spatial_stream = 4, .fw = { .dir = WCN3990_HW_1_0_FW_DIR, + .board = WCN3990_HW_1_0_BOARD_DATA_FILE, + .board_size = WCN3990_BOARD_DATA_SZ, + .board_ext_size = WCN3990_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, .hw_ops = &wcn3990_ops, diff --git a/drivers/net/wireless/ath/ath10k/debugfs_sta.c b/drivers/net/wireless/ath/ath10k/debugfs_sta.c index 367539f2c3..f7912c72cb 100644 --- a/drivers/net/wireless/ath/ath10k/debugfs_sta.c +++ b/drivers/net/wireless/ath/ath10k/debugfs_sta.c @@ -438,7 +438,7 @@ ath10k_dbg_sta_write_peer_debug_trigger(struct file *file, } out: mutex_unlock(&ar->conf_mutex); - return count; + return ret ?: count; } static const struct file_operations fops_peer_debug_trigger = { diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 591ef7416b..0d8c8e948b 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -132,6 +132,7 @@ enum qca9377_chip_id_rev { /* WCN3990 1.0 definitions */ #define WCN3990_HW_1_0_DEV_VERSION ATH10K_HW_WCN3990 #define WCN3990_HW_1_0_FW_DIR ATH10K_FW_DIR "/WCN3990/hw1.0" +#define WCN3990_HW_1_0_BOARD_DATA_FILE "board.bin" #define ATH10K_FW_FILE_BASE "firmware" #define ATH10K_FW_API_MAX 6 diff --git a/drivers/net/wireless/ath/ath10k/targaddrs.h b/drivers/net/wireless/ath/ath10k/targaddrs.h index ec556bb88d..ba37e6c7ce 100644 --- a/drivers/net/wireless/ath/ath10k/targaddrs.h +++ b/drivers/net/wireless/ath/ath10k/targaddrs.h @@ -491,4 +491,7 @@ struct host_interest { #define QCA4019_BOARD_DATA_SZ 12064 #define QCA4019_BOARD_EXT_DATA_SZ 0 +#define WCN3990_BOARD_DATA_SZ 26328 +#define WCN3990_BOARD_EXT_DATA_SZ 0 + #endif /* __TARGADDRS_H__ */ diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 7c1c2658cb..c8ccea542f 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1762,12 +1762,32 @@ void ath10k_wmi_put_wmi_channel(struct ath10k *ar, struct wmi_channel *ch, int ath10k_wmi_wait_for_service_ready(struct ath10k *ar) { - unsigned long time_left; + unsigned long time_left, i; time_left = wait_for_completion_timeout(&ar->wmi.service_ready, WMI_SERVICE_READY_TIMEOUT_HZ); - if (!time_left) - return -ETIMEDOUT; + if (!time_left) { + /* Sometimes the PCI HIF doesn't receive interrupt + * for the service ready message even if the buffer + * was completed. PCIe sniffer shows that it's + * because the corresponding CE ring doesn't fires + * it. Workaround here by polling CE rings once. + */ + ath10k_warn(ar, "failed to receive service ready completion, polling..\n"); + + for (i = 0; i < CE_COUNT; i++) + ath10k_hif_send_complete_check(ar, i, 1); + + time_left = wait_for_completion_timeout(&ar->wmi.service_ready, + WMI_SERVICE_READY_TIMEOUT_HZ); + if (!time_left) { + ath10k_warn(ar, "polling timed out\n"); + return -ETIMEDOUT; + } + + ath10k_warn(ar, "service ready completion received, continuing normally\n"); + } + return 0; } diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index f2149241fb..265b85c40a 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -97,7 +97,7 @@ static struct mhi_controller_config ath11k_mhi_config_qca6390 = { .max_channels = 128, .timeout_ms = 2000, .use_bounce_buf = false, - .buf_len = 0, + .buf_len = 8192, .num_channels = ARRAY_SIZE(ath11k_mhi_channels_qca6390), .ch_cfg = ath11k_mhi_channels_qca6390, .num_events = ARRAY_SIZE(ath11k_mhi_events_qca6390), diff --git a/drivers/net/wireless/ath/ath9k/antenna.c b/drivers/net/wireless/ath/ath9k/antenna.c index 988222cea9..acc84e6711 100644 --- a/drivers/net/wireless/ath/ath9k/antenna.c +++ b/drivers/net/wireless/ath/ath9k/antenna.c @@ -643,7 +643,7 @@ static void ath_ant_try_scan(struct ath_ant_comb *antcomb, conf->main_lna_conf = ATH_ANT_DIV_COMB_LNA1; conf->alt_lna_conf = ATH_ANT_DIV_COMB_LNA1_PLUS_LNA2; } else if (antcomb->rssi_sub > - antcomb->rssi_lna1) { + antcomb->rssi_lna2) { /* set to A-B */ conf->main_lna_conf = ATH_ANT_DIV_COMB_LNA1; conf->alt_lna_conf = ATH_ANT_DIV_COMB_LNA1_MINUS_LNA2; diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index e4eb666c6e..a5265997b5 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -1069,6 +1069,38 @@ static int carl9170_usb_probe(struct usb_interface *intf, ar->usb_ep_cmd_is_bulk = true; } + /* Verify that all expected endpoints are present */ + if (ar->usb_ep_cmd_is_bulk) { + u8 bulk_ep_addr[] = { + AR9170_USB_EP_RX | USB_DIR_IN, + AR9170_USB_EP_TX | USB_DIR_OUT, + AR9170_USB_EP_CMD | USB_DIR_OUT, + 0}; + u8 int_ep_addr[] = { + AR9170_USB_EP_IRQ | USB_DIR_IN, + 0}; + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || + !usb_check_int_endpoints(intf, int_ep_addr)) + err = -ENODEV; + } else { + u8 bulk_ep_addr[] = { + AR9170_USB_EP_RX | USB_DIR_IN, + AR9170_USB_EP_TX | USB_DIR_OUT, + 0}; + u8 int_ep_addr[] = { + AR9170_USB_EP_IRQ | USB_DIR_IN, + AR9170_USB_EP_CMD | USB_DIR_OUT, + 0}; + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || + !usb_check_int_endpoints(intf, int_ep_addr)) + err = -ENODEV; + } + + if (err) { + carl9170_free(ar); + return err; + } + usb_set_intfdata(intf, ar); SET_IEEE80211_DEV(ar->hw, &intf->dev); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index b14c54da56..1c95e8f759 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -790,8 +790,7 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg, scan_request = cfg->scan_request; cfg->scan_request = NULL; - if (timer_pending(&cfg->escan_timeout)) - del_timer_sync(&cfg->escan_timeout); + timer_delete_sync(&cfg->escan_timeout); if (fw_abort) { /* Do a scan abort to stop the driver's scan engine */ @@ -7781,6 +7780,7 @@ void brcmf_cfg80211_detach(struct brcmf_cfg80211_info *cfg) brcmf_btcoex_detach(cfg); wiphy_unregister(cfg->wiphy); wl_deinit_priv(cfg); + cancel_work_sync(&cfg->escan_timeout_work); brcmf_free_wiphy(cfg->wiphy); kfree(cfg); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c index bb5fff8174..2dcf5a827b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c @@ -48,6 +48,8 @@ int iwl_mvm_ftm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, if (!pasn) return -ENOBUFS; + iwl_mvm_ftm_remove_pasn_sta(mvm, addr); + pasn->cipher = iwl_mvm_cipher_to_location_cipher(cipher); switch (pasn->cipher) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c index 44344216a1..4deb242df7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c @@ -104,15 +104,17 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm) if (ret) return ERR_PTR(ret); - if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size)) + if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != + resp_size)) { + iwl_free_resp(&cmd); return ERR_PTR(-EIO); + } + + resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL); + iwl_free_resp(&cmd); - resp = kzalloc(resp_size, GFP_KERNEL); if (!resp) return ERR_PTR(-ENOMEM); - memcpy(resp, cmd.resp_pkt->data, resp_size); - - iwl_free_resp(&cmd); return resp; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index e4fd58f043..c0ffa26bc5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -2571,7 +2571,8 @@ static int iwl_mvm_build_scan_cmd(struct iwl_mvm *mvm, if (ver_handler->version != scan_ver) continue; - return ver_handler->handler(mvm, vif, params, type, uid); + err = ver_handler->handler(mvm, vif, params, type, uid); + return err ? : uid; } err = iwl_mvm_scan_umac(mvm, vif, params, type, uid); diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index 529e325498..ad9678186c 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -2718,7 +2718,7 @@ __mwl8k_cmd_mac_multicast_adr(struct ieee80211_hw *hw, int allmulti, cmd->action |= cpu_to_le16(MWL8K_ENABLE_RX_MULTICAST); cmd->numaddr = cpu_to_le16(mc_count); netdev_hw_addr_list_for_each(ha, mc_list) { - memcpy(cmd->addr[i], ha->addr, ETH_ALEN); + memcpy(cmd->addr[i++], ha->addr, ETH_ALEN); } } diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 3536b9f847..b042dff4ac 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "rtl8xxxu.h" #include "rtl8xxxu_regs.h" @@ -1389,13 +1390,13 @@ rtl8xxxu_gen1_set_tx_power(struct rtl8xxxu_priv *priv, int channel, bool ht40) u8 cck[RTL8723A_MAX_RF_PATHS], ofdm[RTL8723A_MAX_RF_PATHS]; u8 ofdmbase[RTL8723A_MAX_RF_PATHS], mcsbase[RTL8723A_MAX_RF_PATHS]; u32 val32, ofdm_a, ofdm_b, mcs_a, mcs_b; - u8 val8; + u8 val8, base; int group, i; group = rtl8xxxu_gen1_channel_to_group(channel); - cck[0] = priv->cck_tx_power_index_A[group] - 1; - cck[1] = priv->cck_tx_power_index_B[group] - 1; + cck[0] = priv->cck_tx_power_index_A[group]; + cck[1] = priv->cck_tx_power_index_B[group]; if (priv->hi_pa) { if (cck[0] > 0x20) @@ -1406,10 +1407,6 @@ rtl8xxxu_gen1_set_tx_power(struct rtl8xxxu_priv *priv, int channel, bool ht40) ofdm[0] = priv->ht40_1s_tx_power_index_A[group]; ofdm[1] = priv->ht40_1s_tx_power_index_B[group]; - if (ofdm[0]) - ofdm[0] -= 1; - if (ofdm[1]) - ofdm[1] -= 1; ofdmbase[0] = ofdm[0] + priv->ofdm_tx_power_index_diff[group].a; ofdmbase[1] = ofdm[1] + priv->ofdm_tx_power_index_diff[group].b; @@ -1498,20 +1495,19 @@ rtl8xxxu_gen1_set_tx_power(struct rtl8xxxu_priv *priv, int channel, bool ht40) rtl8xxxu_write32(priv, REG_TX_AGC_A_MCS15_MCS12, mcs_a + power_base->reg_0e1c); + val8 = u32_get_bits(mcs_a + power_base->reg_0e1c, 0xff000000); for (i = 0; i < 3; i++) { - if (i != 2) - val8 = (mcsbase[0] > 8) ? (mcsbase[0] - 8) : 0; - else - val8 = (mcsbase[0] > 6) ? (mcsbase[0] - 6) : 0; + base = i != 2 ? 8 : 6; + val8 = max_t(int, val8 - base, 0); rtl8xxxu_write8(priv, REG_OFDM0_XC_TX_IQ_IMBALANCE + i, val8); } + rtl8xxxu_write32(priv, REG_TX_AGC_B_MCS15_MCS12, mcs_b + power_base->reg_0868); + val8 = u32_get_bits(mcs_b + power_base->reg_0868, 0xff000000); for (i = 0; i < 3; i++) { - if (i != 2) - val8 = (mcsbase[1] > 8) ? (mcsbase[1] - 8) : 0; - else - val8 = (mcsbase[1] > 6) ? (mcsbase[1] - 6) : 0; + base = i != 2 ? 8 : 6; + val8 = max_t(int, val8 - base, 0); rtl8xxxu_write8(priv, REG_OFDM0_XD_TX_IQ_IMBALANCE + i, val8); } } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c index c02813fba9..0358e56d01 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c @@ -35,7 +35,7 @@ static long _rtl92de_translate_todbm(struct ieee80211_hw *hw, static void _rtl92de_query_rxphystatus(struct ieee80211_hw *hw, struct rtl_stats *pstats, - struct rx_desc_92d *pdesc, + __le32 *pdesc, struct rx_fwinfo_92d *p_drvinfo, bool packet_match_bssid, bool packet_toself, @@ -49,8 +49,10 @@ static void _rtl92de_query_rxphystatus(struct ieee80211_hw *hw, u8 i, max_spatial_stream; u32 rssi, total_rssi = 0; bool is_cck_rate; + u8 rxmcs; - is_cck_rate = RX_HAL_IS_CCK_RATE(pdesc->rxmcs); + rxmcs = get_rx_desc_rxmcs(pdesc); + is_cck_rate = rxmcs <= DESC_RATE11M; pstats->packet_matchbssid = packet_match_bssid; pstats->packet_toself = packet_toself; pstats->packet_beacon = packet_beacon; @@ -158,8 +160,8 @@ static void _rtl92de_query_rxphystatus(struct ieee80211_hw *hw, pstats->rx_pwdb_all = pwdb_all; pstats->rxpower = rx_pwr_all; pstats->recvsignalpower = rx_pwr_all; - if (pdesc->rxht && pdesc->rxmcs >= DESC_RATEMCS8 && - pdesc->rxmcs <= DESC_RATEMCS15) + if (get_rx_desc_rxht(pdesc) && rxmcs >= DESC_RATEMCS8 && + rxmcs <= DESC_RATEMCS15) max_spatial_stream = 2; else max_spatial_stream = 1; @@ -365,7 +367,7 @@ static void _rtl92de_process_phyinfo(struct ieee80211_hw *hw, static void _rtl92de_translate_rx_signal_stuff(struct ieee80211_hw *hw, struct sk_buff *skb, struct rtl_stats *pstats, - struct rx_desc_92d *pdesc, + __le32 *pdesc, struct rx_fwinfo_92d *p_drvinfo) { struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); @@ -414,7 +416,8 @@ bool rtl92de_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats, stats->icv = (u16)get_rx_desc_icv(pdesc); stats->crc = (u16)get_rx_desc_crc32(pdesc); stats->hwerror = (stats->crc | stats->icv); - stats->decrypted = !get_rx_desc_swdec(pdesc); + stats->decrypted = !get_rx_desc_swdec(pdesc) && + get_rx_desc_enc_type(pdesc) != RX_DESC_ENC_NONE; stats->rate = (u8)get_rx_desc_rxmcs(pdesc); stats->shortpreamble = (u16)get_rx_desc_splcp(pdesc); stats->isampdu = (bool)(get_rx_desc_paggr(pdesc) == 1); @@ -427,8 +430,6 @@ bool rtl92de_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats, rx_status->band = hw->conf.chandef.chan->band; if (get_rx_desc_crc32(pdesc)) rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; - if (!get_rx_desc_swdec(pdesc)) - rx_status->flag |= RX_FLAG_DECRYPTED; if (get_rx_desc_bw(pdesc)) rx_status->bw = RATE_INFO_BW_40; if (get_rx_desc_rxht(pdesc)) @@ -442,9 +443,7 @@ bool rtl92de_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats, if (phystatus) { p_drvinfo = (struct rx_fwinfo_92d *)(skb->data + stats->rx_bufshift); - _rtl92de_translate_rx_signal_stuff(hw, - skb, stats, - (struct rx_desc_92d *)pdesc, + _rtl92de_translate_rx_signal_stuff(hw, skb, stats, pdesc, p_drvinfo); } /*rx_status->qual = stats->signal; */ diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h index d01578875c..eb3f768140 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h @@ -14,6 +14,15 @@ #define USB_HWDESC_HEADER_LEN 32 #define CRCLENGTH 4 +enum rtl92d_rx_desc_enc { + RX_DESC_ENC_NONE = 0, + RX_DESC_ENC_WEP40 = 1, + RX_DESC_ENC_TKIP_WO_MIC = 2, + RX_DESC_ENC_TKIP_MIC = 3, + RX_DESC_ENC_AES = 4, + RX_DESC_ENC_WEP104 = 5, +}; + /* macros to read/write various fields in RX or TX descriptors */ static inline void set_tx_desc_pkt_size(__le32 *__pdesc, u32 __val) @@ -246,6 +255,11 @@ static inline u32 get_rx_desc_drv_info_size(__le32 *__pdesc) return le32_get_bits(*__pdesc, GENMASK(19, 16)); } +static inline u32 get_rx_desc_enc_type(__le32 *__pdesc) +{ + return le32_get_bits(*__pdesc, GENMASK(22, 20)); +} + static inline u32 get_rx_desc_shift(__le32 *__pdesc) { return le32_get_bits(*__pdesc, GENMASK(25, 24)); @@ -380,10 +394,17 @@ struct rx_fwinfo_92d { u8 csi_target[2]; u8 sigevm; u8 max_ex_pwr; +#ifdef __LITTLE_ENDIAN u8 ex_intf_flag:1; u8 sgi_en:1; u8 rxsc:2; u8 reserve:4; +#else + u8 reserve:4; + u8 rxsc:2; + u8 sgi_en:1; + u8 ex_intf_flag:1; +#endif } __packed; struct tx_desc_92d { @@ -488,64 +509,6 @@ struct tx_desc_92d { u32 reserve_pass_pcie_mm_limit[4]; } __packed; -struct rx_desc_92d { - u32 length:14; - u32 crc32:1; - u32 icverror:1; - u32 drv_infosize:4; - u32 security:3; - u32 qos:1; - u32 shift:2; - u32 phystatus:1; - u32 swdec:1; - u32 lastseg:1; - u32 firstseg:1; - u32 eor:1; - u32 own:1; - - u32 macid:5; - u32 tid:4; - u32 hwrsvd:5; - u32 paggr:1; - u32 faggr:1; - u32 a1_fit:4; - u32 a2_fit:4; - u32 pam:1; - u32 pwr:1; - u32 moredata:1; - u32 morefrag:1; - u32 type:2; - u32 mc:1; - u32 bc:1; - - u32 seq:12; - u32 frag:4; - u32 nextpktlen:14; - u32 nextind:1; - u32 rsvd:1; - - u32 rxmcs:6; - u32 rxht:1; - u32 amsdu:1; - u32 splcp:1; - u32 bandwidth:1; - u32 htc:1; - u32 tcpchk_rpt:1; - u32 ipcchk_rpt:1; - u32 tcpchk_valid:1; - u32 hwpcerr:1; - u32 hwpcind:1; - u32 iv0:16; - - u32 iv1; - - u32 tsfl; - - u32 bufferaddress; - u32 bufferaddress64; - -} __packed; - void rtl92de_tx_fill_desc(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr, u8 *pdesc, u8 *pbd_desc_tx, struct ieee80211_tx_info *info, diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 6e73d3a00e..8b4be2e4d1 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -287,6 +287,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) return NULL; } skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE); + skb_mark_for_recycle(skb); /* Align ip header to a 16 bytes boundary */ skb_reserve(skb, NET_IP_ALIGN); diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 8890fcd59c..cfc2a7e652 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -424,7 +424,8 @@ struct trf7970a { enum trf7970a_state state; struct device *dev; struct spi_device *spi; - struct regulator *regulator; + struct regulator *vin_regulator; + struct regulator *vddio_regulator; struct nfc_digital_dev *ddev; u32 quirks; bool is_initiator; @@ -1883,7 +1884,7 @@ static int trf7970a_power_up(struct trf7970a *trf) if (trf->state != TRF7970A_ST_PWR_OFF) return 0; - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vin_regulator); if (ret) { dev_err(trf->dev, "%s - Can't enable VIN: %d\n", __func__, ret); return ret; @@ -1926,7 +1927,7 @@ static int trf7970a_power_down(struct trf7970a *trf) if (trf->en2_gpiod && !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) gpiod_set_value_cansleep(trf->en2_gpiod, 0); - ret = regulator_disable(trf->regulator); + ret = regulator_disable(trf->vin_regulator); if (ret) dev_err(trf->dev, "%s - Can't disable VIN: %d\n", __func__, ret); @@ -2065,37 +2066,37 @@ static int trf7970a_probe(struct spi_device *spi) mutex_init(&trf->lock); INIT_DELAYED_WORK(&trf->timeout_work, trf7970a_timeout_work_handler); - trf->regulator = devm_regulator_get(&spi->dev, "vin"); - if (IS_ERR(trf->regulator)) { - ret = PTR_ERR(trf->regulator); + trf->vin_regulator = devm_regulator_get(&spi->dev, "vin"); + if (IS_ERR(trf->vin_regulator)) { + ret = PTR_ERR(trf->vin_regulator); dev_err(trf->dev, "Can't get VIN regulator: %d\n", ret); goto err_destroy_lock; } - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vin_regulator); if (ret) { dev_err(trf->dev, "Can't enable VIN: %d\n", ret); goto err_destroy_lock; } - uvolts = regulator_get_voltage(trf->regulator); + uvolts = regulator_get_voltage(trf->vin_regulator); if (uvolts > 4000000) trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3; - trf->regulator = devm_regulator_get(&spi->dev, "vdd-io"); - if (IS_ERR(trf->regulator)) { - ret = PTR_ERR(trf->regulator); + trf->vddio_regulator = devm_regulator_get(&spi->dev, "vdd-io"); + if (IS_ERR(trf->vddio_regulator)) { + ret = PTR_ERR(trf->vddio_regulator); dev_err(trf->dev, "Can't get VDD_IO regulator: %d\n", ret); - goto err_destroy_lock; + goto err_disable_vin_regulator; } - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vddio_regulator); if (ret) { dev_err(trf->dev, "Can't enable VDD_IO: %d\n", ret); - goto err_destroy_lock; + goto err_disable_vin_regulator; } - if (regulator_get_voltage(trf->regulator) == 1800000) { + if (regulator_get_voltage(trf->vddio_regulator) == 1800000) { trf->io_ctrl = TRF7970A_REG_IO_CTRL_IO_LOW; dev_dbg(trf->dev, "trf7970a config vdd_io to 1.8V\n"); } @@ -2108,7 +2109,7 @@ static int trf7970a_probe(struct spi_device *spi) if (!trf->ddev) { dev_err(trf->dev, "Can't allocate NFC digital device\n"); ret = -ENOMEM; - goto err_disable_regulator; + goto err_disable_vddio_regulator; } nfc_digital_set_parent_dev(trf->ddev, trf->dev); @@ -2137,8 +2138,10 @@ static int trf7970a_probe(struct spi_device *spi) trf7970a_shutdown(trf); err_free_ddev: nfc_digital_free_device(trf->ddev); -err_disable_regulator: - regulator_disable(trf->regulator); +err_disable_vddio_regulator: + regulator_disable(trf->vddio_regulator); +err_disable_vin_regulator: + regulator_disable(trf->vin_regulator); err_destroy_lock: mutex_destroy(&trf->lock); return ret; @@ -2157,7 +2160,8 @@ static int trf7970a_remove(struct spi_device *spi) nfc_digital_unregister_device(trf->ddev); nfc_digital_free_device(trf->ddev); - regulator_disable(trf->regulator); + regulator_disable(trf->vddio_regulator); + regulator_disable(trf->vin_regulator); mutex_destroy(&trf->lock); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8f06e5c170..960a31e330 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1185,7 +1185,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return effects; } -static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, +static void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, struct nvme_command *cmd, int status) { if (effects & NVME_CMD_EFFECTS_CSE_MASK) { @@ -1201,6 +1201,8 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, nvme_queue_scan(ctrl); flush_work(&ctrl->scan_work); } + if (ns) + return; switch (cmd->common.opcode) { case nvme_admin_set_features: @@ -1235,7 +1237,7 @@ int nvme_execute_passthru_rq(struct request *rq) effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); ret = nvme_execute_rq(disk, rq, false); if (effects) /* nothing to be done for zero cmd effects */ - nvme_passthru_end(ctrl, effects, cmd, ret); + nvme_passthru_end(ctrl, ns, effects, cmd, ret); return ret; } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 73eddb67f0..f8ad43b5f0 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -190,7 +190,8 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) if (nvme_path_is_disabled(ns)) continue; - if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA) + if (ns->ctrl->numa_node != NUMA_NO_NODE && + READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA) distance = node_distance(node, ns->ctrl->numa_node); else distance = LOCAL_DISTANCE; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fd20f3fdb1..7bb74112fe 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3339,6 +3339,9 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_BOGUS_NID, }, { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 5bdc3ba51f..a3d3a1bfd2 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -530,10 +530,18 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item, if (strtobool(page, &enable)) return -EINVAL; + /* + * take a global nvmet_config_sem because the disable routine has a + * window where it releases the subsys-lock, giving a chance to + * a parallel enable to concurrently execute causing the disable to + * have a misaccounting of the ns percpu_ref. + */ + down_write(&nvmet_config_sem); if (enable) ret = nvmet_ns_enable(ns); else nvmet_ns_disable(ns); + up_write(&nvmet_config_sem); return ret ? ret : count; } diff --git a/drivers/nvmem/meson-efuse.c b/drivers/nvmem/meson-efuse.c index d6b533497c..ba2714bef8 100644 --- a/drivers/nvmem/meson-efuse.c +++ b/drivers/nvmem/meson-efuse.c @@ -47,7 +47,6 @@ static int meson_efuse_probe(struct platform_device *pdev) struct nvmem_config *econfig; struct clk *clk; unsigned int size; - int ret; sm_np = of_parse_phandle(pdev->dev.of_node, "secure-monitor", 0); if (!sm_np) { @@ -60,27 +59,9 @@ static int meson_efuse_probe(struct platform_device *pdev) if (!fw) return -EPROBE_DEFER; - clk = devm_clk_get(dev, NULL); - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to get efuse gate"); - return ret; - } - - ret = clk_prepare_enable(clk); - if (ret) { - dev_err(dev, "failed to enable gate"); - return ret; - } - - ret = devm_add_action_or_reset(dev, - (void(*)(void *))clk_disable_unprepare, - clk); - if (ret) { - dev_err(dev, "failed to add disable callback"); - return ret; - } + clk = devm_clk_get_enabled(dev, NULL); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), "failed to get efuse gate"); if (meson_sm_call(fw, SM_EFUSE_USER_MAX, &size, 0, 0, 0, 0, 0) < 0) { dev_err(dev, "failed to get max user"); diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index ffb7b0446a..a6358889fd 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "OF: " fmt +#include #include #include #include @@ -678,6 +679,17 @@ void of_changeset_destroy(struct of_changeset *ocs) { struct of_changeset_entry *ce, *cen; + /* + * When a device is deleted, the device links to/from it are also queued + * for deletion. Until these device links are freed, the devices + * themselves aren't freed. If the device being deleted is due to an + * overlay change, this device might be holding a reference to a device + * node that will be freed. So, wait until all already pending device + * links are deleted before freeing a device node. This ensures we don't + * free any device node that has a non-zero reference count. + */ + device_link_wait_removal(); + list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node) __of_changeset_entry_destroy(ce); } diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index fcb1fdb22f..fc92d30a0a 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -658,8 +658,13 @@ int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep) nbars = (reg & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; + /* + * PCIe r6.0, sec 7.8.6.2 require us to support at least one + * size in the range from 1 MB to 512 GB. Advertise support + * for 1 MB BAR size only. + */ for (i = 0; i < nbars; i++, offset += PCI_REBAR_CTRL) - dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0); + dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, BIT(4)); } dw_pcie_setup(pci); diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c index 2f82da76e3..3703ea0d90 100644 --- a/drivers/pci/controller/dwc/pcie-tegra194.c +++ b/drivers/pci/controller/dwc/pcie-tegra194.c @@ -2142,10 +2142,13 @@ static int tegra_pcie_dw_probe(struct platform_device *pdev) ret = tegra_pcie_config_ep(pcie, pdev); if (ret < 0) goto fail; + else + return 0; break; default: dev_err(dev, "Invalid PCIe device type %d\n", pcie->mode); + ret = -EINVAL; } fail: diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index f44c0667a8..46b82bacaf 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -459,16 +459,21 @@ static void pci_device_remove(struct device *dev) struct pci_dev *pci_dev = to_pci_dev(dev); struct pci_driver *drv = pci_dev->driver; - if (drv) { - if (drv->remove) { - pm_runtime_get_sync(dev); - drv->remove(pci_dev); - pm_runtime_put_noidle(dev); - } - pcibios_free_irq(pci_dev); - pci_dev->driver = NULL; - pci_iov_remove(pci_dev); + if (drv->remove) { + pm_runtime_get_sync(dev); + /* + * If the driver provides a .runtime_idle() callback and it has + * started to run already, it may continue to run in parallel + * with the code below, so wait until all of the runtime PM + * activity has completed. + */ + pm_runtime_barrier(dev); + drv->remove(pci_dev); + pm_runtime_put_noidle(dev); } + pcibios_free_irq(pci_dev); + pci_dev->driver = NULL; + pci_iov_remove(pci_dev); /* Undo the runtime PM settings in local_pci_probe() */ pm_runtime_put_sync(dev); diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index cf0d4ba2e1..ab83f78f3e 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -335,11 +335,16 @@ void pci_dpc_init(struct pci_dev *pdev) return; pdev->dpc_rp_extensions = true; - pdev->dpc_rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; - if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) { - pci_err(pdev, "RP PIO log size %u is invalid\n", - pdev->dpc_rp_log_size); - pdev->dpc_rp_log_size = 0; + + /* Quirks may set dpc_rp_log_size if device or firmware is buggy */ + if (!pdev->dpc_rp_log_size) { + pdev->dpc_rp_log_size = + (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; + if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) { + pci_err(pdev, "RP PIO log size %u is invalid\n", + pdev->dpc_rp_log_size); + pdev->dpc_rp_log_size = 0; + } } } diff --git a/drivers/pci/pcie/edr.c b/drivers/pci/pcie/edr.c index 87734e4c3c..3521000760 100644 --- a/drivers/pci/pcie/edr.c +++ b/drivers/pci/pcie/edr.c @@ -32,10 +32,10 @@ static int acpi_enable_dpc(struct pci_dev *pdev) int status = 0; /* - * Behavior when calling unsupported _DSM functions is undefined, - * so check whether EDR_PORT_DPC_ENABLE_DSM is supported. + * Per PCI Firmware r3.3, sec 4.6.12, EDR_PORT_DPC_ENABLE_DSM is + * optional. Return success if it's not implemented. */ - if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 5, + if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 6, 1ULL << EDR_PORT_DPC_ENABLE_DSM)) return 0; @@ -46,12 +46,7 @@ static int acpi_enable_dpc(struct pci_dev *pdev) argv4.package.count = 1; argv4.package.elements = &req; - /* - * Per Downstream Port Containment Related Enhancements ECN to PCI - * Firmware Specification r3.2, sec 4.6.12, EDR_PORT_DPC_ENABLE_DSM is - * optional. Return success if it's not implemented. - */ - obj = acpi_evaluate_dsm(adev->handle, &pci_acpi_dsm_guid, 5, + obj = acpi_evaluate_dsm(adev->handle, &pci_acpi_dsm_guid, 6, EDR_PORT_DPC_ENABLE_DSM, &argv4); if (!obj) return 0; @@ -85,8 +80,9 @@ static struct pci_dev *acpi_dpc_port_get(struct pci_dev *pdev) u16 port; /* - * Behavior when calling unsupported _DSM functions is undefined, - * so check whether EDR_PORT_DPC_ENABLE_DSM is supported. + * If EDR_PORT_LOCATE_DSM is not implemented under the target of + * EDR, the target is the port that experienced the containment + * event (PCI Firmware r3.3, sec 4.6.13). */ if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 5, 1ULL << EDR_PORT_LOCATE_DSM)) @@ -103,6 +99,16 @@ static struct pci_dev *acpi_dpc_port_get(struct pci_dev *pdev) return NULL; } + /* + * Bit 31 represents the success/failure of the operation. If bit + * 31 is set, the operation failed. + */ + if (obj->integer.value & BIT(31)) { + ACPI_FREE(obj); + pci_err(pdev, "Locate Port _DSM failed\n"); + return NULL; + } + /* * Firmware returns DPC port BDF details in following format: * 15:8 = bus diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index b576aa890c..410fc44d79 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -13,6 +13,7 @@ #define dev_fmt(fmt) "AER: " fmt #include +#include #include #include #include @@ -79,6 +80,18 @@ static int report_error_detected(struct pci_dev *dev, return 0; } +static int pci_pm_runtime_get_sync(struct pci_dev *pdev, void *data) +{ + pm_runtime_get_sync(&pdev->dev); + return 0; +} + +static int pci_pm_runtime_put(struct pci_dev *pdev, void *data) +{ + pm_runtime_put(&pdev->dev); + return 0; +} + static int report_frozen_detected(struct pci_dev *dev, void *data) { return report_error_detected(dev, pci_channel_io_frozen, data); @@ -195,6 +208,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, else bridge = pci_upstream_bridge(dev); + pci_walk_bridge(bridge, pci_pm_runtime_get_sync, NULL); + pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bridge(bridge, report_frozen_detected, &status); @@ -239,10 +254,15 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pcie_clear_device_status(dev); pci_aer_clear_nonfatal_status(dev); } + + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_info(bridge, "device recovery successful\n"); return status; failed: + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 1b6484c906..4d4267105c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -12,6 +12,7 @@ * file, where their drivers can use them. */ +#include #include #include #include @@ -5893,3 +5894,102 @@ static void nvidia_ion_ahci_fixup(struct pci_dev *pdev) pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING; } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup); + +static void rom_bar_overlap_defect(struct pci_dev *dev) +{ + pci_info(dev, "working around ROM BAR overlap defect\n"); + dev->rom_bar_overlap = 1; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1533, rom_bar_overlap_defect); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1536, rom_bar_overlap_defect); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1537, rom_bar_overlap_defect); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1538, rom_bar_overlap_defect); + +#ifdef CONFIG_PCIEASPM +/* + * Several Intel DG2 graphics devices advertise that they can only tolerate + * 1us latency when transitioning from L1 to L0, which may prevent ASPM L1 + * from being enabled. But in fact these devices can tolerate unlimited + * latency. Override their Device Capabilities value to allow ASPM L1 to + * be enabled. + */ +static void aspm_l1_acceptable_latency(struct pci_dev *dev) +{ + u32 l1_lat = FIELD_GET(PCI_EXP_DEVCAP_L1, dev->devcap); + + if (l1_lat < 7) { + dev->devcap |= FIELD_PREP(PCI_EXP_DEVCAP_L1, 7); + pci_info(dev, "ASPM: overriding L1 acceptable latency from %#x to 0x7\n", + l1_lat); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f80, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f81, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f82, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f83, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f84, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f85, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f86, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f87, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x4f88, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5690, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5691, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5692, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5693, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5694, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5695, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a0, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a1, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a2, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a3, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a4, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a5, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56a6, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56b0, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56b1, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56c0, aspm_l1_acceptable_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56c1, aspm_l1_acceptable_latency); +#endif + +#ifdef CONFIG_PCIE_DPC +/* + * Intel Ice Lake, Tiger Lake and Alder Lake BIOS has a bug that clears + * the DPC RP PIO Log Size of the integrated Thunderbolt PCIe Root + * Ports. + */ +static void dpc_log_size(struct pci_dev *dev) +{ + u16 dpc, val; + + dpc = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC); + if (!dpc) + return; + + pci_read_config_word(dev, dpc + PCI_EXP_DPC_CAP, &val); + if (!(val & PCI_EXP_DPC_CAP_RP_EXT)) + return; + + if (!((val & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8)) { + pci_info(dev, "Overriding RP PIO Log Size to 4\n"); + dev->dpc_rp_log_size = 4; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x461f, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x462f, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x463f, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x466e, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a1d, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a1f, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a21, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a23, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a23, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a25, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a27, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a29, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2b, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2d, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2f, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa73f, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa76e, dpc_log_size); +#endif diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 875d50c16f..b492e67c3d 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -75,12 +75,16 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) * as zero when disabled, so don't update ROM BARs unless * they're enabled. See * https://lore.kernel.org/r/43147B3D.1030309@vc.cvut.cz/ + * But we must update ROM BAR for buggy devices where even a + * disabled ROM can conflict with other BARs. */ - if (!(res->flags & IORESOURCE_ROM_ENABLE)) + if (!(res->flags & IORESOURCE_ROM_ENABLE) && + !dev->rom_bar_overlap) return; reg = dev->rom_base_reg; - new |= PCI_ROM_ADDRESS_ENABLE; + if (res->flags & IORESOURCE_ROM_ENABLE) + new |= PCI_ROM_ADDRESS_ENABLE; } else return; diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 119e2c0392..bf7706bf10 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -1485,6 +1485,19 @@ int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, } EXPORT_SYMBOL_GPL(tegra_xusb_padctl_get_usb3_companion); +int tegra_xusb_padctl_get_port_number(struct phy *phy) +{ + struct tegra_xusb_lane *lane; + + if (!phy) + return -ENODEV; + + lane = phy_get_drvdata(phy); + + return lane->index; +} +EXPORT_SYMBOL_GPL(tegra_xusb_padctl_get_port_number); + MODULE_AUTHOR("Thierry Reding "); MODULE_DESCRIPTION("Tegra XUSB Pad Controller driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index 54064714d7..b223583dfb 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -43,7 +43,7 @@ #define SCU614 0x614 /* Disable GPIO Internal Pull-Down #1 */ #define SCU618 0x618 /* Disable GPIO Internal Pull-Down #2 */ #define SCU61C 0x61c /* Disable GPIO Internal Pull-Down #3 */ -#define SCU620 0x620 /* Disable GPIO Internal Pull-Down #4 */ +#define SCU630 0x630 /* Disable GPIO Internal Pull-Down #4 */ #define SCU634 0x634 /* Disable GPIO Internal Pull-Down #5 */ #define SCU638 0x638 /* Disable GPIO Internal Pull-Down #6 */ #define SCU690 0x690 /* Multi-function Pin Control #24 */ @@ -2487,38 +2487,38 @@ static struct aspeed_pin_config aspeed_g6_configs[] = { ASPEED_PULL_DOWN_PINCONF(D14, SCU61C, 0), /* GPIOS7 */ - ASPEED_PULL_DOWN_PINCONF(T24, SCU620, 23), + ASPEED_PULL_DOWN_PINCONF(T24, SCU630, 23), /* GPIOS6 */ - ASPEED_PULL_DOWN_PINCONF(P23, SCU620, 22), + ASPEED_PULL_DOWN_PINCONF(P23, SCU630, 22), /* GPIOS5 */ - ASPEED_PULL_DOWN_PINCONF(P24, SCU620, 21), + ASPEED_PULL_DOWN_PINCONF(P24, SCU630, 21), /* GPIOS4 */ - ASPEED_PULL_DOWN_PINCONF(R26, SCU620, 20), + ASPEED_PULL_DOWN_PINCONF(R26, SCU630, 20), /* GPIOS3*/ - ASPEED_PULL_DOWN_PINCONF(R24, SCU620, 19), + ASPEED_PULL_DOWN_PINCONF(R24, SCU630, 19), /* GPIOS2 */ - ASPEED_PULL_DOWN_PINCONF(T26, SCU620, 18), + ASPEED_PULL_DOWN_PINCONF(T26, SCU630, 18), /* GPIOS1 */ - ASPEED_PULL_DOWN_PINCONF(T25, SCU620, 17), + ASPEED_PULL_DOWN_PINCONF(T25, SCU630, 17), /* GPIOS0 */ - ASPEED_PULL_DOWN_PINCONF(R23, SCU620, 16), + ASPEED_PULL_DOWN_PINCONF(R23, SCU630, 16), /* GPIOR7 */ - ASPEED_PULL_DOWN_PINCONF(U26, SCU620, 15), + ASPEED_PULL_DOWN_PINCONF(U26, SCU630, 15), /* GPIOR6 */ - ASPEED_PULL_DOWN_PINCONF(W26, SCU620, 14), + ASPEED_PULL_DOWN_PINCONF(W26, SCU630, 14), /* GPIOR5 */ - ASPEED_PULL_DOWN_PINCONF(T23, SCU620, 13), + ASPEED_PULL_DOWN_PINCONF(T23, SCU630, 13), /* GPIOR4 */ - ASPEED_PULL_DOWN_PINCONF(U25, SCU620, 12), + ASPEED_PULL_DOWN_PINCONF(U25, SCU630, 12), /* GPIOR3*/ - ASPEED_PULL_DOWN_PINCONF(V26, SCU620, 11), + ASPEED_PULL_DOWN_PINCONF(V26, SCU630, 11), /* GPIOR2 */ - ASPEED_PULL_DOWN_PINCONF(V24, SCU620, 10), + ASPEED_PULL_DOWN_PINCONF(V24, SCU630, 10), /* GPIOR1 */ - ASPEED_PULL_DOWN_PINCONF(U24, SCU620, 9), + ASPEED_PULL_DOWN_PINCONF(U24, SCU630, 9), /* GPIOR0 */ - ASPEED_PULL_DOWN_PINCONF(V25, SCU620, 8), + ASPEED_PULL_DOWN_PINCONF(V25, SCU630, 8), /* GPIOX7 */ ASPEED_PULL_DOWN_PINCONF(AB10, SCU634, 31), diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 456b72041c..46a06067e9 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -205,6 +205,7 @@ static int pinctrl_register_one_pin(struct pinctrl_dev *pctldev, const struct pinctrl_pin_desc *pin) { struct pin_desc *pindesc; + int error; pindesc = pin_desc_get(pctldev, pin->number); if (pindesc) { @@ -226,18 +227,25 @@ static int pinctrl_register_one_pin(struct pinctrl_dev *pctldev, } else { pindesc->name = kasprintf(GFP_KERNEL, "PIN%u", pin->number); if (!pindesc->name) { - kfree(pindesc); - return -ENOMEM; + error = -ENOMEM; + goto failed; } pindesc->dynamic_name = true; } pindesc->drv_data = pin->drv_data; - radix_tree_insert(&pctldev->pin_desc_tree, pin->number, pindesc); + error = radix_tree_insert(&pctldev->pin_desc_tree, pin->number, pindesc); + if (error) + goto failed; + pr_debug("registered pin %d (%s) on %s\n", pin->number, pindesc->name, pctldev->desc->name); return 0; + +failed: + kfree(pindesc); + return error; } static int pinctrl_register_pins(struct pinctrl_dev *pctldev, @@ -2098,13 +2106,7 @@ int pinctrl_enable(struct pinctrl_dev *pctldev) error = pinctrl_claim_hogs(pctldev); if (error) { - dev_err(pctldev->dev, "could not claim hogs: %i\n", - error); - pinctrl_free_pindescs(pctldev, pctldev->desc->pins, - pctldev->desc->npins); - mutex_destroy(&pctldev->mutex); - kfree(pctldev); - + dev_err(pctldev->dev, "could not claim hogs: %i\n", error); return error; } diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index eac55fee52..0220228c50 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -220,14 +220,16 @@ int pinctrl_dt_to_map(struct pinctrl *p, struct pinctrl_dev *pctldev) for (state = 0; ; state++) { /* Retrieve the pinctrl-* property */ propname = kasprintf(GFP_KERNEL, "pinctrl-%d", state); - if (!propname) - return -ENOMEM; + if (!propname) { + ret = -ENOMEM; + goto err; + } prop = of_find_property(np, propname, &size); kfree(propname); if (!prop) { if (state == 0) { - of_node_put(np); - return -ENODEV; + ret = -ENODEV; + goto err; } break; } diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c index 0fa1c36148..deade01027 100644 --- a/drivers/pinctrl/mediatek/pinctrl-paris.c +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c @@ -79,78 +79,76 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev, { struct mtk_pinctrl *hw = pinctrl_dev_get_drvdata(pctldev); u32 param = pinconf_to_config_param(*config); - int pullup, err, reg, ret = 1; + int pullup, reg, err = -ENOTSUPP, ret = 1; const struct mtk_pin_desc *desc; - if (pin >= hw->soc->npins) { - err = -EINVAL; - goto out; - } + if (pin >= hw->soc->npins) + return -EINVAL; + desc = (const struct mtk_pin_desc *)&hw->soc->pins[pin]; switch (param) { case PIN_CONFIG_BIAS_DISABLE: case PIN_CONFIG_BIAS_PULL_UP: case PIN_CONFIG_BIAS_PULL_DOWN: - if (hw->soc->bias_get_combo) { - err = hw->soc->bias_get_combo(hw, desc, &pullup, &ret); - if (err) - goto out; - if (ret == MTK_PUPD_SET_R1R0_00) - ret = MTK_DISABLE; - if (param == PIN_CONFIG_BIAS_DISABLE) { - if (ret != MTK_DISABLE) - err = -EINVAL; - } else if (param == PIN_CONFIG_BIAS_PULL_UP) { - if (!pullup || ret == MTK_DISABLE) - err = -EINVAL; - } else if (param == PIN_CONFIG_BIAS_PULL_DOWN) { - if (pullup || ret == MTK_DISABLE) - err = -EINVAL; - } - } else { - err = -ENOTSUPP; + if (!hw->soc->bias_get_combo) + break; + err = hw->soc->bias_get_combo(hw, desc, &pullup, &ret); + if (err) + break; + if (ret == MTK_PUPD_SET_R1R0_00) + ret = MTK_DISABLE; + if (param == PIN_CONFIG_BIAS_DISABLE) { + if (ret != MTK_DISABLE) + err = -EINVAL; + } else if (param == PIN_CONFIG_BIAS_PULL_UP) { + if (!pullup || ret == MTK_DISABLE) + err = -EINVAL; + } else if (param == PIN_CONFIG_BIAS_PULL_DOWN) { + if (pullup || ret == MTK_DISABLE) + err = -EINVAL; } break; case PIN_CONFIG_SLEW_RATE: err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_SR, &ret); break; case PIN_CONFIG_INPUT_ENABLE: - case PIN_CONFIG_OUTPUT_ENABLE: + err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_IES, &ret); + if (!ret) + err = -EINVAL; + break; + case PIN_CONFIG_OUTPUT: err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_DIR, &ret); if (err) - goto out; - /* CONFIG Current direction return value - * ------------- ----------------- ---------------------- - * OUTPUT_ENABLE output 1 (= HW value) - * input 0 (= HW value) - * INPUT_ENABLE output 0 (= reverse HW value) - * input 1 (= reverse HW value) - */ - if (param == PIN_CONFIG_INPUT_ENABLE) - ret = !ret; + break; + + if (!ret) { + err = -EINVAL; + break; + } + err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_DO, &ret); break; case PIN_CONFIG_INPUT_SCHMITT_ENABLE: err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_DIR, &ret); if (err) - goto out; + break; /* return error when in output mode * because schmitt trigger only work in input mode */ if (ret) { err = -EINVAL; - goto out; + break; } err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_SMT, &ret); - + if (!ret) + err = -EINVAL; break; case PIN_CONFIG_DRIVE_STRENGTH: - if (hw->soc->drive_get) - err = hw->soc->drive_get(hw, desc, &ret); - else - err = -ENOTSUPP; + if (!hw->soc->drive_get) + break; + err = hw->soc->drive_get(hw, desc, &ret); break; case MTK_PIN_CONFIG_TDSEL: case MTK_PIN_CONFIG_RDSEL: @@ -160,23 +158,18 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev, break; case MTK_PIN_CONFIG_PU_ADV: case MTK_PIN_CONFIG_PD_ADV: - if (hw->soc->adv_pull_get) { - pullup = param == MTK_PIN_CONFIG_PU_ADV; - err = hw->soc->adv_pull_get(hw, desc, pullup, &ret); - } else - err = -ENOTSUPP; + if (!hw->soc->adv_pull_get) + break; + pullup = param == MTK_PIN_CONFIG_PU_ADV; + err = hw->soc->adv_pull_get(hw, desc, pullup, &ret); break; case MTK_PIN_CONFIG_DRV_ADV: - if (hw->soc->adv_drive_get) - err = hw->soc->adv_drive_get(hw, desc, &ret); - else - err = -ENOTSUPP; + if (!hw->soc->adv_drive_get) + break; + err = hw->soc->adv_drive_get(hw, desc, &ret); break; - default: - err = -ENOTSUPP; } -out: if (!err) *config = pinconf_to_config_packed(param, ret); @@ -188,54 +181,33 @@ static int mtk_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, { struct mtk_pinctrl *hw = pinctrl_dev_get_drvdata(pctldev); const struct mtk_pin_desc *desc; - int err = 0; + int err = -ENOTSUPP; u32 reg; - if (pin >= hw->soc->npins) { - err = -EINVAL; - goto err; - } + if (pin >= hw->soc->npins) + return -EINVAL; + desc = (const struct mtk_pin_desc *)&hw->soc->pins[pin]; switch ((u32)param) { case PIN_CONFIG_BIAS_DISABLE: - if (hw->soc->bias_set_combo) - err = hw->soc->bias_set_combo(hw, desc, 0, MTK_DISABLE); - else - err = -ENOTSUPP; + if (!hw->soc->bias_set_combo) + break; + err = hw->soc->bias_set_combo(hw, desc, 0, MTK_DISABLE); break; case PIN_CONFIG_BIAS_PULL_UP: - if (hw->soc->bias_set_combo) - err = hw->soc->bias_set_combo(hw, desc, 1, arg); - else - err = -ENOTSUPP; + if (!hw->soc->bias_set_combo) + break; + err = hw->soc->bias_set_combo(hw, desc, 1, arg); break; case PIN_CONFIG_BIAS_PULL_DOWN: - if (hw->soc->bias_set_combo) - err = hw->soc->bias_set_combo(hw, desc, 0, arg); - else - err = -ENOTSUPP; - break; - case PIN_CONFIG_OUTPUT_ENABLE: - err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_SMT, - MTK_DISABLE); - /* Keep set direction to consider the case that a GPIO pin - * does not have SMT control - */ - if (err != -ENOTSUPP) - goto err; - - err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DIR, - MTK_OUTPUT); + if (!hw->soc->bias_set_combo) + break; + err = hw->soc->bias_set_combo(hw, desc, 0, arg); break; case PIN_CONFIG_INPUT_ENABLE: /* regard all non-zero value as enable */ err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_IES, !!arg); - if (err) - goto err; - - err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DIR, - MTK_INPUT); break; case PIN_CONFIG_SLEW_RATE: /* regard all non-zero value as enable */ @@ -245,7 +217,7 @@ static int mtk_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DO, arg); if (err) - goto err; + break; err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DIR, MTK_OUTPUT); @@ -257,15 +229,14 @@ static int mtk_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, */ err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DIR, !arg); if (err) - goto err; + break; err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_SMT, !!arg); break; case PIN_CONFIG_DRIVE_STRENGTH: - if (hw->soc->drive_set) - err = hw->soc->drive_set(hw, desc, arg); - else - err = -ENOTSUPP; + if (!hw->soc->drive_set) + break; + err = hw->soc->drive_set(hw, desc, arg); break; case MTK_PIN_CONFIG_TDSEL: case MTK_PIN_CONFIG_RDSEL: @@ -275,26 +246,19 @@ static int mtk_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, break; case MTK_PIN_CONFIG_PU_ADV: case MTK_PIN_CONFIG_PD_ADV: - if (hw->soc->adv_pull_set) { - bool pullup; - - pullup = param == MTK_PIN_CONFIG_PU_ADV; - err = hw->soc->adv_pull_set(hw, desc, pullup, - arg); - } else - err = -ENOTSUPP; + if (!hw->soc->adv_pull_set) + break; + err = hw->soc->adv_pull_set(hw, desc, + (param == MTK_PIN_CONFIG_PU_ADV), + arg); break; case MTK_PIN_CONFIG_DRV_ADV: - if (hw->soc->adv_drive_set) - err = hw->soc->adv_drive_set(hw, desc, arg); - else - err = -ENOTSUPP; + if (!hw->soc->adv_drive_set) + break; + err = hw->soc->adv_drive_set(hw, desc, arg); break; - default: - err = -ENOTSUPP; } -err: return err; } diff --git a/drivers/pinctrl/meson/pinctrl-meson-a1.c b/drivers/pinctrl/meson/pinctrl-meson-a1.c index 79f5d753d7..50a87d9618 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-a1.c +++ b/drivers/pinctrl/meson/pinctrl-meson-a1.c @@ -250,7 +250,7 @@ static const unsigned int pdm_dclk_x_pins[] = { GPIOX_10 }; static const unsigned int pdm_din2_a_pins[] = { GPIOA_6 }; static const unsigned int pdm_din1_a_pins[] = { GPIOA_7 }; static const unsigned int pdm_din0_a_pins[] = { GPIOA_8 }; -static const unsigned int pdm_dclk_pins[] = { GPIOA_9 }; +static const unsigned int pdm_dclk_a_pins[] = { GPIOA_9 }; /* gen_clk */ static const unsigned int gen_clk_x_pins[] = { GPIOX_7 }; @@ -591,7 +591,7 @@ static struct meson_pmx_group meson_a1_periphs_groups[] = { GROUP(pdm_din2_a, 3), GROUP(pdm_din1_a, 3), GROUP(pdm_din0_a, 3), - GROUP(pdm_dclk, 3), + GROUP(pdm_dclk_a, 3), GROUP(pwm_c_a, 3), GROUP(pwm_b_a, 3), @@ -755,7 +755,7 @@ static const char * const spi_a_groups[] = { static const char * const pdm_groups[] = { "pdm_din0_x", "pdm_din1_x", "pdm_din2_x", "pdm_dclk_x", "pdm_din2_a", - "pdm_din1_a", "pdm_din0_a", "pdm_dclk", + "pdm_din1_a", "pdm_din0_a", "pdm_dclk_a", }; static const char * const gen_clk_groups[] = { diff --git a/drivers/pinctrl/renesas/core.c b/drivers/pinctrl/renesas/core.c index 75fc420b6b..8d3b75231f 100644 --- a/drivers/pinctrl/renesas/core.c +++ b/drivers/pinctrl/renesas/core.c @@ -868,9 +868,11 @@ static void __init sh_pfc_check_cfg_reg(const char *drvname, sh_pfc_err("reg 0x%x: var_field_width declares %u instead of %u bits\n", cfg_reg->reg, rw, cfg_reg->reg_width); - if (n != cfg_reg->nr_enum_ids) + if (n != cfg_reg->nr_enum_ids) { sh_pfc_err("reg 0x%x: enum_ids[] has %u instead of %u values\n", cfg_reg->reg, cfg_reg->nr_enum_ids, n); + n = cfg_reg->nr_enum_ids; + } check_enum_ids: sh_pfc_check_reg_enums(drvname, cfg_reg->reg, cfg_reg->enum_ids, n); diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 6fb3e597c5..4e9d3f25c3 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -136,8 +136,6 @@ static int intel_vbtn_input_setup(struct platform_device *device) priv->switches_dev->id.bustype = BUS_HOST; if (priv->has_switches) { - detect_tablet_mode(&device->dev); - ret = input_register_device(priv->switches_dev); if (ret) return ret; @@ -316,6 +314,9 @@ static int intel_vbtn_probe(struct platform_device *device) if (ACPI_FAILURE(status)) dev_err(&device->dev, "Error VBDL failed with ACPI status %d\n", status); } + // Check switches after buttons since VBDL may have side effects. + if (has_switches) + detect_tablet_mode(&device->dev); device_init_wakeup(&device->dev, true); /* diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 08a9b802be..161bd19441 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -1153,6 +1153,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_BIOS_VERSION, "CHUWI.D86JLBNR"), }, }, + { + /* Chuwi Vi8 dual-boot (CWI506) */ + .driver_data = (void *)&chuwi_vi8_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_MATCH(DMI_PRODUCT_NAME, "i86"), + DMI_MATCH(DMI_BIOS_VERSION, "CHUWI2.D86JHBNR02"), + }, + }, { /* Chuwi Vi8 Plus (CWI519) */ .driver_data = (void *)&chuwi_vi8_plus_data, diff --git a/drivers/power/supply/mt6360_charger.c b/drivers/power/supply/mt6360_charger.c index f1248faf59..383bf19819 100644 --- a/drivers/power/supply/mt6360_charger.c +++ b/drivers/power/supply/mt6360_charger.c @@ -591,7 +591,7 @@ static const struct regulator_ops mt6360_chg_otg_ops = { }; static const struct regulator_desc mt6360_otg_rdesc = { - .of_match = "usb-otg-vbus", + .of_match = "usb-otg-vbus-regulator", .name = "usb-otg-vbus", .ops = &mt6360_chg_otg_ops, .owner = THIS_MODULE, diff --git a/drivers/power/supply/rt9455_charger.c b/drivers/power/supply/rt9455_charger.c index 594bb3b8a4..a84afccd50 100644 --- a/drivers/power/supply/rt9455_charger.c +++ b/drivers/power/supply/rt9455_charger.c @@ -193,6 +193,7 @@ static const int rt9455_voreg_values[] = { 4450000, 4450000, 4450000, 4450000, 4450000, 4450000, 4450000, 4450000 }; +#if IS_ENABLED(CONFIG_USB_PHY) /* * When the charger is in boost mode, REG02[7:2] represent boost output * voltage. @@ -208,6 +209,7 @@ static const int rt9455_boost_voltage_values[] = { 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, }; +#endif /* REG07[3:0] (VMREG) in uV */ static const int rt9455_vmreg_values[] = { diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c index 0a7920cbd4..a0467f0b54 100644 --- a/drivers/pwm/pwm-sti.c +++ b/drivers/pwm/pwm-sti.c @@ -571,6 +571,7 @@ static int sti_pwm_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct sti_pwm_compat_data *cdata; + struct pwm_chip *chip; struct sti_pwm_chip *pc; unsigned int i; int irq, ret; @@ -578,6 +579,7 @@ static int sti_pwm_probe(struct platform_device *pdev) pc = devm_kzalloc(dev, sizeof(*pc), GFP_KERNEL); if (!pc) return -ENOMEM; + chip = &pc->chip; cdata = devm_kzalloc(dev, sizeof(*cdata), GFP_KERNEL); if (!cdata) @@ -623,40 +625,28 @@ static int sti_pwm_probe(struct platform_device *pdev) return ret; if (cdata->pwm_num_devs) { - pc->pwm_clk = of_clk_get_by_name(dev->of_node, "pwm"); + pc->pwm_clk = devm_clk_get_prepared(dev, "pwm"); if (IS_ERR(pc->pwm_clk)) { dev_err(dev, "failed to get PWM clock\n"); return PTR_ERR(pc->pwm_clk); } - - ret = clk_prepare(pc->pwm_clk); - if (ret) { - dev_err(dev, "failed to prepare clock\n"); - return ret; - } } if (cdata->cpt_num_devs) { - pc->cpt_clk = of_clk_get_by_name(dev->of_node, "capture"); + pc->cpt_clk = devm_clk_get_prepared(dev, "capture"); if (IS_ERR(pc->cpt_clk)) { dev_err(dev, "failed to get PWM capture clock\n"); return PTR_ERR(pc->cpt_clk); } - ret = clk_prepare(pc->cpt_clk); - if (ret) { - dev_err(dev, "failed to prepare clock\n"); - return ret; - } - cdata->ddata = devm_kzalloc(dev, cdata->cpt_num_devs * sizeof(*cdata->ddata), GFP_KERNEL); if (!cdata->ddata) return -ENOMEM; } - pc->chip.dev = dev; - pc->chip.ops = &sti_pwm_ops; - pc->chip.npwm = max(cdata->pwm_num_devs, cdata->cpt_num_devs); + chip->dev = dev; + chip->ops = &sti_pwm_ops; + chip->npwm = max(cdata->pwm_num_devs, cdata->cpt_num_devs); for (i = 0; i < cdata->cpt_num_devs; i++) { struct sti_cpt_ddata *ddata = &cdata->ddata[i]; @@ -665,28 +655,7 @@ static int sti_pwm_probe(struct platform_device *pdev) mutex_init(&ddata->lock); } - ret = pwmchip_add(&pc->chip); - if (ret < 0) { - clk_unprepare(pc->pwm_clk); - clk_unprepare(pc->cpt_clk); - return ret; - } - - platform_set_drvdata(pdev, pc); - - return 0; -} - -static int sti_pwm_remove(struct platform_device *pdev) -{ - struct sti_pwm_chip *pc = platform_get_drvdata(pdev); - - pwmchip_remove(&pc->chip); - - clk_unprepare(pc->pwm_clk); - clk_unprepare(pc->cpt_clk); - - return 0; + return devm_pwmchip_add(dev, chip); } static const struct of_device_id sti_pwm_of_match[] = { @@ -701,7 +670,6 @@ static struct platform_driver sti_pwm_driver = { .of_match_table = sti_pwm_of_match, }, .probe = sti_pwm_probe, - .remove = sti_pwm_remove, }; module_platform_driver(sti_pwm_driver); diff --git a/drivers/regulator/bd71828-regulator.c b/drivers/regulator/bd71828-regulator.c index a4f09a5a30..d07f0d120c 100644 --- a/drivers/regulator/bd71828-regulator.c +++ b/drivers/regulator/bd71828-regulator.c @@ -207,14 +207,11 @@ static const struct bd71828_regulator_data bd71828_rdata[] = { .suspend_reg = BD71828_REG_BUCK1_SUSP_VOLT, .suspend_mask = BD71828_MASK_BUCK1267_VOLT, .suspend_on_mask = BD71828_MASK_SUSP_EN, - .lpsr_on_mask = BD71828_MASK_LPSR_EN, /* * LPSR voltage is same as SUSPEND voltage. Allow - * setting it so that regulator can be set enabled at - * LPSR state + * only enabling/disabling regulator for LPSR state */ - .lpsr_reg = BD71828_REG_BUCK1_SUSP_VOLT, - .lpsr_mask = BD71828_MASK_BUCK1267_VOLT, + .lpsr_on_mask = BD71828_MASK_LPSR_EN, }, .reg_inits = buck1_inits, .reg_init_amnt = ARRAY_SIZE(buck1_inits), @@ -289,13 +286,7 @@ static const struct bd71828_regulator_data bd71828_rdata[] = { ROHM_DVS_LEVEL_SUSPEND | ROHM_DVS_LEVEL_LPSR, .run_reg = BD71828_REG_BUCK3_VOLT, - .idle_reg = BD71828_REG_BUCK3_VOLT, - .suspend_reg = BD71828_REG_BUCK3_VOLT, - .lpsr_reg = BD71828_REG_BUCK3_VOLT, .run_mask = BD71828_MASK_BUCK3_VOLT, - .idle_mask = BD71828_MASK_BUCK3_VOLT, - .suspend_mask = BD71828_MASK_BUCK3_VOLT, - .lpsr_mask = BD71828_MASK_BUCK3_VOLT, .idle_on_mask = BD71828_MASK_IDLE_EN, .suspend_on_mask = BD71828_MASK_SUSP_EN, .lpsr_on_mask = BD71828_MASK_LPSR_EN, @@ -330,13 +321,7 @@ static const struct bd71828_regulator_data bd71828_rdata[] = { ROHM_DVS_LEVEL_SUSPEND | ROHM_DVS_LEVEL_LPSR, .run_reg = BD71828_REG_BUCK4_VOLT, - .idle_reg = BD71828_REG_BUCK4_VOLT, - .suspend_reg = BD71828_REG_BUCK4_VOLT, - .lpsr_reg = BD71828_REG_BUCK4_VOLT, .run_mask = BD71828_MASK_BUCK4_VOLT, - .idle_mask = BD71828_MASK_BUCK4_VOLT, - .suspend_mask = BD71828_MASK_BUCK4_VOLT, - .lpsr_mask = BD71828_MASK_BUCK4_VOLT, .idle_on_mask = BD71828_MASK_IDLE_EN, .suspend_on_mask = BD71828_MASK_SUSP_EN, .lpsr_on_mask = BD71828_MASK_LPSR_EN, @@ -371,13 +356,7 @@ static const struct bd71828_regulator_data bd71828_rdata[] = { ROHM_DVS_LEVEL_SUSPEND | ROHM_DVS_LEVEL_LPSR, .run_reg = BD71828_REG_BUCK5_VOLT, - .idle_reg = BD71828_REG_BUCK5_VOLT, - .suspend_reg = BD71828_REG_BUCK5_VOLT, - .lpsr_reg = BD71828_REG_BUCK5_VOLT, .run_mask = BD71828_MASK_BUCK5_VOLT, - .idle_mask = BD71828_MASK_BUCK5_VOLT, - .suspend_mask = BD71828_MASK_BUCK5_VOLT, - .lpsr_mask = BD71828_MASK_BUCK5_VOLT, .idle_on_mask = BD71828_MASK_IDLE_EN, .suspend_on_mask = BD71828_MASK_SUSP_EN, .lpsr_on_mask = BD71828_MASK_LPSR_EN, @@ -494,13 +473,7 @@ static const struct bd71828_regulator_data bd71828_rdata[] = { ROHM_DVS_LEVEL_SUSPEND | ROHM_DVS_LEVEL_LPSR, .run_reg = BD71828_REG_LDO1_VOLT, - .idle_reg = BD71828_REG_LDO1_VOLT, - .suspend_reg = BD71828_REG_LDO1_VOLT, - .lpsr_reg = BD71828_REG_LDO1_VOLT, .run_mask = BD71828_MASK_LDO_VOLT, - .idle_mask = BD71828_MASK_LDO_VOLT, - .suspend_mask = BD71828_MASK_LDO_VOLT, - .lpsr_mask = BD71828_MASK_LDO_VOLT, .idle_on_mask = BD71828_MASK_IDLE_EN, .suspend_on_mask = BD71828_MASK_SUSP_EN, .lpsr_on_mask = BD71828_MASK_LPSR_EN, @@ -534,13 +507,7 @@ static const struct bd71828_regulator_data bd71828_rdata[] = { ROHM_DVS_LEVEL_SUSPEND | ROHM_DVS_LEVEL_LPSR, .run_reg = BD71828_REG_LDO2_VOLT, - .idle_reg = BD71828_REG_LDO2_VOLT, - .suspend_reg = BD71828_REG_LDO2_VOLT, - .lpsr_reg = BD71828_REG_LDO2_VOLT, .run_mask = BD71828_MASK_LDO_VOLT, - .idle_mask = BD71828_MASK_LDO_VOLT, - .suspend_mask = BD71828_MASK_LDO_VOLT, - .lpsr_mask = BD71828_MASK_LDO_VOLT, .idle_on_mask = BD71828_MASK_IDLE_EN, .suspend_on_mask = BD71828_MASK_SUSP_EN, .lpsr_on_mask = BD71828_MASK_LPSR_EN, @@ -574,13 +541,7 @@ static const struct bd71828_regulator_data bd71828_rdata[] = { ROHM_DVS_LEVEL_SUSPEND | ROHM_DVS_LEVEL_LPSR, .run_reg = BD71828_REG_LDO3_VOLT, - .idle_reg = BD71828_REG_LDO3_VOLT, - .suspend_reg = BD71828_REG_LDO3_VOLT, - .lpsr_reg = BD71828_REG_LDO3_VOLT, .run_mask = BD71828_MASK_LDO_VOLT, - .idle_mask = BD71828_MASK_LDO_VOLT, - .suspend_mask = BD71828_MASK_LDO_VOLT, - .lpsr_mask = BD71828_MASK_LDO_VOLT, .idle_on_mask = BD71828_MASK_IDLE_EN, .suspend_on_mask = BD71828_MASK_SUSP_EN, .lpsr_on_mask = BD71828_MASK_LPSR_EN, @@ -615,13 +576,7 @@ static const struct bd71828_regulator_data bd71828_rdata[] = { ROHM_DVS_LEVEL_SUSPEND | ROHM_DVS_LEVEL_LPSR, .run_reg = BD71828_REG_LDO4_VOLT, - .idle_reg = BD71828_REG_LDO4_VOLT, - .suspend_reg = BD71828_REG_LDO4_VOLT, - .lpsr_reg = BD71828_REG_LDO4_VOLT, .run_mask = BD71828_MASK_LDO_VOLT, - .idle_mask = BD71828_MASK_LDO_VOLT, - .suspend_mask = BD71828_MASK_LDO_VOLT, - .lpsr_mask = BD71828_MASK_LDO_VOLT, .idle_on_mask = BD71828_MASK_IDLE_EN, .suspend_on_mask = BD71828_MASK_SUSP_EN, .lpsr_on_mask = BD71828_MASK_LPSR_EN, @@ -656,13 +611,7 @@ static const struct bd71828_regulator_data bd71828_rdata[] = { ROHM_DVS_LEVEL_SUSPEND | ROHM_DVS_LEVEL_LPSR, .run_reg = BD71828_REG_LDO5_VOLT, - .idle_reg = BD71828_REG_LDO5_VOLT, - .suspend_reg = BD71828_REG_LDO5_VOLT, - .lpsr_reg = BD71828_REG_LDO5_VOLT, .run_mask = BD71828_MASK_LDO_VOLT, - .idle_mask = BD71828_MASK_LDO_VOLT, - .suspend_mask = BD71828_MASK_LDO_VOLT, - .lpsr_mask = BD71828_MASK_LDO_VOLT, .idle_on_mask = BD71828_MASK_IDLE_EN, .suspend_on_mask = BD71828_MASK_SUSP_EN, .lpsr_on_mask = BD71828_MASK_LPSR_EN, @@ -721,9 +670,6 @@ static const struct bd71828_regulator_data bd71828_rdata[] = { .suspend_reg = BD71828_REG_LDO7_VOLT, .lpsr_reg = BD71828_REG_LDO7_VOLT, .run_mask = BD71828_MASK_LDO_VOLT, - .idle_mask = BD71828_MASK_LDO_VOLT, - .suspend_mask = BD71828_MASK_LDO_VOLT, - .lpsr_mask = BD71828_MASK_LDO_VOLT, .idle_on_mask = BD71828_MASK_IDLE_EN, .suspend_on_mask = BD71828_MASK_SUSP_EN, .lpsr_on_mask = BD71828_MASK_LPSR_EN, diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 9b1f27f87c..d6febb9ec6 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1872,19 +1872,24 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, } } - if (err != -EEXIST) + if (err != -EEXIST) { regulator->debugfs = debugfs_create_dir(supply_name, rdev->debugfs); - if (IS_ERR(regulator->debugfs)) - rdev_dbg(rdev, "Failed to create debugfs directory\n"); + if (IS_ERR(regulator->debugfs)) { + rdev_dbg(rdev, "Failed to create debugfs directory\n"); + regulator->debugfs = NULL; + } + } - debugfs_create_u32("uA_load", 0444, regulator->debugfs, - ®ulator->uA_load); - debugfs_create_u32("min_uV", 0444, regulator->debugfs, - ®ulator->voltage[PM_SUSPEND_ON].min_uV); - debugfs_create_u32("max_uV", 0444, regulator->debugfs, - ®ulator->voltage[PM_SUSPEND_ON].max_uV); - debugfs_create_file("constraint_flags", 0444, regulator->debugfs, - regulator, &constraint_flags_fops); + if (regulator->debugfs) { + debugfs_create_u32("uA_load", 0444, regulator->debugfs, + ®ulator->uA_load); + debugfs_create_u32("min_uV", 0444, regulator->debugfs, + ®ulator->voltage[PM_SUSPEND_ON].min_uV); + debugfs_create_u32("max_uV", 0444, regulator->debugfs, + ®ulator->voltage[PM_SUSPEND_ON].max_uV); + debugfs_create_file("constraint_flags", 0444, regulator->debugfs, + regulator, &constraint_flags_fops); + } /* * Check now if the regulator is an always on regulator - if diff --git a/drivers/regulator/irq_helpers.c b/drivers/regulator/irq_helpers.c index 5227644355..a44a0b30a6 100644 --- a/drivers/regulator/irq_helpers.c +++ b/drivers/regulator/irq_helpers.c @@ -350,6 +350,9 @@ void *regulator_irq_helper(struct device *dev, h->irq = irq; h->desc = *d; + h->desc.name = devm_kstrdup(dev, d->name, GFP_KERNEL); + if (!h->desc.name) + return ERR_PTR(-ENOMEM); ret = init_rdev_state(dev, h, rdev, common_errs, per_rdev_errs, rdev_amount); diff --git a/drivers/regulator/mt6360-regulator.c b/drivers/regulator/mt6360-regulator.c index 4d34be94d1..fc464a4450 100644 --- a/drivers/regulator/mt6360-regulator.c +++ b/drivers/regulator/mt6360-regulator.c @@ -319,15 +319,15 @@ static unsigned int mt6360_regulator_of_map_mode(unsigned int hw_mode) } } -#define MT6360_REGULATOR_DESC(_name, _sname, ereg, emask, vreg, vmask, \ - mreg, mmask, streg, stmask, vranges, \ - vcnts, offon_delay, irq_tbls) \ +#define MT6360_REGULATOR_DESC(match, _name, _sname, ereg, emask, vreg, \ + vmask, mreg, mmask, streg, stmask, \ + vranges, vcnts, offon_delay, irq_tbls) \ { \ .desc = { \ .name = #_name, \ .supply_name = #_sname, \ .id = MT6360_REGULATOR_##_name, \ - .of_match = of_match_ptr(#_name), \ + .of_match = of_match_ptr(match), \ .regulators_node = of_match_ptr("regulator"), \ .of_map_mode = mt6360_regulator_of_map_mode, \ .owner = THIS_MODULE, \ @@ -351,21 +351,29 @@ static unsigned int mt6360_regulator_of_map_mode(unsigned int hw_mode) } static const struct mt6360_regulator_desc mt6360_regulator_descs[] = { - MT6360_REGULATOR_DESC(BUCK1, BUCK1_VIN, 0x117, 0x40, 0x110, 0xff, 0x117, 0x30, 0x117, 0x04, + MT6360_REGULATOR_DESC("buck1", BUCK1, BUCK1_VIN, + 0x117, 0x40, 0x110, 0xff, 0x117, 0x30, 0x117, 0x04, buck_vout_ranges, 256, 0, buck1_irq_tbls), - MT6360_REGULATOR_DESC(BUCK2, BUCK2_VIN, 0x127, 0x40, 0x120, 0xff, 0x127, 0x30, 0x127, 0x04, + MT6360_REGULATOR_DESC("buck2", BUCK2, BUCK2_VIN, + 0x127, 0x40, 0x120, 0xff, 0x127, 0x30, 0x127, 0x04, buck_vout_ranges, 256, 0, buck2_irq_tbls), - MT6360_REGULATOR_DESC(LDO6, LDO_VIN3, 0x137, 0x40, 0x13B, 0xff, 0x137, 0x30, 0x137, 0x04, + MT6360_REGULATOR_DESC("ldo6", LDO6, LDO_VIN3, + 0x137, 0x40, 0x13B, 0xff, 0x137, 0x30, 0x137, 0x04, ldo_vout_ranges1, 256, 0, ldo6_irq_tbls), - MT6360_REGULATOR_DESC(LDO7, LDO_VIN3, 0x131, 0x40, 0x135, 0xff, 0x131, 0x30, 0x131, 0x04, + MT6360_REGULATOR_DESC("ldo7", LDO7, LDO_VIN3, + 0x131, 0x40, 0x135, 0xff, 0x131, 0x30, 0x131, 0x04, ldo_vout_ranges1, 256, 0, ldo7_irq_tbls), - MT6360_REGULATOR_DESC(LDO1, LDO_VIN1, 0x217, 0x40, 0x21B, 0xff, 0x217, 0x30, 0x217, 0x04, + MT6360_REGULATOR_DESC("ldo1", LDO1, LDO_VIN1, + 0x217, 0x40, 0x21B, 0xff, 0x217, 0x30, 0x217, 0x04, ldo_vout_ranges2, 256, 0, ldo1_irq_tbls), - MT6360_REGULATOR_DESC(LDO2, LDO_VIN1, 0x211, 0x40, 0x215, 0xff, 0x211, 0x30, 0x211, 0x04, + MT6360_REGULATOR_DESC("ldo2", LDO2, LDO_VIN1, + 0x211, 0x40, 0x215, 0xff, 0x211, 0x30, 0x211, 0x04, ldo_vout_ranges2, 256, 0, ldo2_irq_tbls), - MT6360_REGULATOR_DESC(LDO3, LDO_VIN1, 0x205, 0x40, 0x209, 0xff, 0x205, 0x30, 0x205, 0x04, + MT6360_REGULATOR_DESC("ldo3", LDO3, LDO_VIN1, + 0x205, 0x40, 0x209, 0xff, 0x205, 0x30, 0x205, 0x04, ldo_vout_ranges2, 256, 100, ldo3_irq_tbls), - MT6360_REGULATOR_DESC(LDO5, LDO_VIN2, 0x20B, 0x40, 0x20F, 0x7f, 0x20B, 0x30, 0x20B, 0x04, + MT6360_REGULATOR_DESC("ldo5", LDO5, LDO_VIN2, + 0x20B, 0x40, 0x20F, 0x7f, 0x20B, 0x30, 0x20B, 0x04, ldo_vout_ranges3, 128, 100, ldo5_irq_tbls), }; diff --git a/drivers/regulator/vqmmc-ipq4019-regulator.c b/drivers/regulator/vqmmc-ipq4019-regulator.c index 6d5ae25d08..e2a28788d8 100644 --- a/drivers/regulator/vqmmc-ipq4019-regulator.c +++ b/drivers/regulator/vqmmc-ipq4019-regulator.c @@ -86,6 +86,7 @@ static const struct of_device_id regulator_ipq4019_of_match[] = { { .compatible = "qcom,vqmmc-ipq4019-regulator", }, {}, }; +MODULE_DEVICE_TABLE(of, regulator_ipq4019_of_match); static struct platform_driver ipq4019_regulator_driver = { .probe = ipq4019_regulator_probe, diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c index 9679cc2689..211c7e3b84 100644 --- a/drivers/remoteproc/mtk_scp.c +++ b/drivers/remoteproc/mtk_scp.c @@ -126,7 +126,7 @@ static int scp_elf_read_ipi_buf_addr(struct mtk_scp *scp, static int scp_ipi_init(struct mtk_scp *scp, const struct firmware *fw) { int ret; - size_t offset; + size_t buf_sz, offset; /* read the ipi buf addr from FW itself first */ ret = scp_elf_read_ipi_buf_addr(scp, fw, &offset); @@ -138,6 +138,14 @@ static int scp_ipi_init(struct mtk_scp *scp, const struct firmware *fw) } dev_info(scp->dev, "IPI buf addr %#010zx\n", offset); + /* Make sure IPI buffer fits in the L2TCM range assigned to this core */ + buf_sz = sizeof(*scp->recv_buf) + sizeof(*scp->send_buf); + + if (scp->sram_size < buf_sz + offset) { + dev_err(scp->dev, "IPI buffer does not fit in SRAM.\n"); + return -EOVERFLOW; + } + scp->recv_buf = (struct mtk_share_obj __iomem *) (scp->sram_base + offset); scp->send_buf = (struct mtk_share_obj __iomem *) diff --git a/drivers/s390/cio/cio_inject.c b/drivers/s390/cio/cio_inject.c index 8613fa9372..a2e771ebae 100644 --- a/drivers/s390/cio/cio_inject.c +++ b/drivers/s390/cio/cio_inject.c @@ -95,7 +95,7 @@ static ssize_t crw_inject_write(struct file *file, const char __user *buf, return -EINVAL; } - buffer = vmemdup_user(buf, lbuf); + buffer = memdup_user_nul(buf, lbuf); if (IS_ERR(buffer)) return -ENOMEM; diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index a111154a90..c16f18cfee 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -360,10 +360,8 @@ int ccw_device_set_online(struct ccw_device *cdev) spin_lock_irq(cdev->ccwlock); ret = ccw_device_online(cdev); - spin_unlock_irq(cdev->ccwlock); - if (ret == 0) - wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); - else { + if (ret) { + spin_unlock_irq(cdev->ccwlock); CIO_MSG_EVENT(0, "ccw_device_online returned %d, " "device 0.%x.%04x\n", ret, cdev->private->dev_id.ssid, @@ -372,7 +370,12 @@ int ccw_device_set_online(struct ccw_device *cdev) put_device(&cdev->dev); return ret; } - spin_lock_irq(cdev->ccwlock); + /* Wait until a final state is reached */ + while (!dev_fsm_final_state(cdev)) { + spin_unlock_irq(cdev->ccwlock); + wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); + spin_lock_irq(cdev->ccwlock); + } /* Check if online processing was successful */ if ((cdev->private->state != DEV_STATE_ONLINE) && (cdev->private->state != DEV_STATE_W4SENSE)) { diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 45e810c6ea..9c77ec3a8d 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -679,8 +679,8 @@ static void qdio_handle_activate_check(struct qdio_irq *irq_ptr, lgr_info_log(); } -static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, - int dstat) +static int qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, + int dstat, int dcc) { DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qest irq"); @@ -688,15 +688,18 @@ static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, goto error; if (dstat & ~(DEV_STAT_DEV_END | DEV_STAT_CHN_END)) goto error; + if (dcc == 1) + return -EAGAIN; if (!(dstat & DEV_STAT_DEV_END)) goto error; qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED); - return; + return 0; error: DBF_ERROR("%4x EQ:error", irq_ptr->schid.sch_no); DBF_ERROR("ds: %2x cs:%2x", dstat, cstat); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + return -EIO; } /* qdio interrupt handler */ @@ -705,7 +708,7 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, { struct qdio_irq *irq_ptr = cdev->private->qdio_data; struct subchannel_id schid; - int cstat, dstat; + int cstat, dstat, rc, dcc; if (!intparm || !irq_ptr) { ccw_device_get_schid(cdev, &schid); @@ -725,10 +728,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, qdio_irq_check_sense(irq_ptr, irb); cstat = irb->scsw.cmd.cstat; dstat = irb->scsw.cmd.dstat; + dcc = scsw_cmd_is_valid_cc(&irb->scsw) ? irb->scsw.cmd.cc : 0; + rc = 0; switch (irq_ptr->state) { case QDIO_IRQ_STATE_INACTIVE: - qdio_establish_handle_irq(irq_ptr, cstat, dstat); + rc = qdio_establish_handle_irq(irq_ptr, cstat, dstat, dcc); break; case QDIO_IRQ_STATE_CLEANUP: qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); @@ -742,12 +747,25 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, if (cstat || dstat) qdio_handle_activate_check(irq_ptr, intparm, cstat, dstat); + else if (dcc == 1) + rc = -EAGAIN; break; case QDIO_IRQ_STATE_STOPPED: break; default: WARN_ON_ONCE(1); } + + if (rc == -EAGAIN) { + DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qint retry"); + rc = ccw_device_start(cdev, &irq_ptr->ccw, intparm, 0, 0); + if (!rc) + return; + DBF_ERROR("%4x RETRY ERR", irq_ptr->schid.sch_no); + DBF_ERROR("rc:%4x", rc); + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + } + wake_up(&cdev->private->wait_q); } diff --git a/drivers/s390/cio/trace.h b/drivers/s390/cio/trace.h index 86993de253..a4c5c6736b 100644 --- a/drivers/s390/cio/trace.h +++ b/drivers/s390/cio/trace.h @@ -50,7 +50,7 @@ DECLARE_EVENT_CLASS(s390_class_schib, __entry->devno = schib->pmcw.dev; __entry->schib = *schib; __entry->pmcw_ena = schib->pmcw.ena; - __entry->pmcw_st = schib->pmcw.ena; + __entry->pmcw_st = schib->pmcw.st; __entry->pmcw_dnv = schib->pmcw.dnv; __entry->pmcw_dev = schib->pmcw.dev; __entry->pmcw_lpm = schib->pmcw.lpm; diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 4968964ac5..1dd6dd2ed7 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1031,7 +1031,7 @@ static int hex2bitmap(const char *str, unsigned long *bitmap, int bits) */ static int modify_bitmap(const char *str, unsigned long *bitmap, int bits) { - int a, i, z; + unsigned long a, i, z; char *np, sign; /* bits needs to be a multiple of 8 */ diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 17b3f1ea3a..fb80b7593c 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -575,6 +575,7 @@ static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc, { if (!zq || !try_module_get(zq->queue->ap_dev.device.driver->owner)) return NULL; + zcrypt_card_get(zc); zcrypt_queue_get(zq); get_device(&zq->queue->ap_dev.device); atomic_add(weight, &zc->load); @@ -594,6 +595,7 @@ static inline void zcrypt_drop_queue(struct zcrypt_card *zc, atomic_sub(weight, &zq->load); put_device(&zq->queue->ap_dev.device); zcrypt_queue_put(zq); + zcrypt_card_put(zc); module_put(mod); } diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index e8c3608798..71464e9ad4 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -545,7 +545,6 @@ static inline bool qeth_out_queue_is_empty(struct qeth_qdio_out_q *queue) struct qeth_qdio_info { atomic_t state; /* input */ - int no_in_queues; struct qeth_qdio_q *in_q; struct qeth_qdio_q *c_q; struct qeth_qdio_buffer_pool in_buf_pool; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 62e7576bff..5c69cba645 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -354,8 +354,8 @@ static int qeth_cq_init(struct qeth_card *card) qdio_reset_buffers(card->qdio.c_q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); card->qdio.c_q->next_buf_to_init = 127; - rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, - card->qdio.no_in_queues - 1, 0, 127, NULL); + rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 1, 0, 127, + NULL); if (rc) { QETH_CARD_TEXT_(card, 2, "1err%d", rc); goto out; @@ -366,35 +366,33 @@ static int qeth_cq_init(struct qeth_card *card) return rc; } +static void qeth_free_cq(struct qeth_card *card) +{ + if (card->qdio.c_q) { + qeth_free_qdio_queue(card->qdio.c_q); + card->qdio.c_q = NULL; + } +} + static int qeth_alloc_cq(struct qeth_card *card) { if (card->options.cq == QETH_CQ_ENABLED) { QETH_CARD_TEXT(card, 2, "cqon"); - card->qdio.c_q = qeth_alloc_qdio_queue(); if (!card->qdio.c_q) { - dev_err(&card->gdev->dev, "Failed to create completion queue\n"); - return -ENOMEM; + card->qdio.c_q = qeth_alloc_qdio_queue(); + if (!card->qdio.c_q) { + dev_err(&card->gdev->dev, + "Failed to create completion queue\n"); + return -ENOMEM; + } } - - card->qdio.no_in_queues = 2; } else { QETH_CARD_TEXT(card, 2, "nocq"); - card->qdio.c_q = NULL; - card->qdio.no_in_queues = 1; + qeth_free_cq(card); } - QETH_CARD_TEXT_(card, 2, "iqc%d", card->qdio.no_in_queues); return 0; } -static void qeth_free_cq(struct qeth_card *card) -{ - if (card->qdio.c_q) { - --card->qdio.no_in_queues; - qeth_free_qdio_queue(card->qdio.c_q); - card->qdio.c_q = NULL; - } -} - static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15, int delayed) { @@ -1142,6 +1140,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev, } } +/** + * qeth_irq() - qeth interrupt handler + * @cdev: ccw device + * @intparm: expect pointer to iob + * @irb: Interruption Response Block + * + * In the good path: + * corresponding qeth channel is locked with last used iob as active_cmd. + * But this function is also called for error interrupts. + * + * Caller ensures that: + * Interrupts are disabled; ccw device lock is held; + * + */ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) { @@ -1183,11 +1195,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, iob = (struct qeth_cmd_buffer *) (addr_t)intparm; } - qeth_unlock_channel(card, channel); - rc = qeth_check_irb_error(card, cdev, irb); if (rc) { /* IO was terminated, free its resources. */ + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); return; @@ -1231,6 +1242,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, rc = qeth_get_problem(card, cdev, irb); if (rc) { card->read_or_write_problem = 1; + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); qeth_clear_ipacmd_list(card); @@ -1239,6 +1251,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, } } + if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) { + /* channel command hasn't started: retry. + * active_cmd is still set to last iob + */ + QETH_CARD_TEXT(card, 2, "irqcc1"); + rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob), + (addr_t)iob, 0, 0, iob->timeout); + if (rc) { + QETH_DBF_MESSAGE(2, + "ccw retry on %x failed, rc = %i\n", + CARD_DEVID(card), rc); + QETH_CARD_TEXT_(card, 2, " err%d", rc); + qeth_unlock_channel(card, channel); + qeth_cancel_cmd(iob, rc); + } + return; + } + + qeth_unlock_channel(card, channel); + if (iob) { /* sanity check: */ if (irb->scsw.cmd.count > iob->length) { @@ -1458,7 +1490,6 @@ static void qeth_init_qdio_info(struct qeth_card *card) card->qdio.default_out_queue = QETH_DEFAULT_QUEUE; /* inbound */ - card->qdio.no_in_queues = 1; card->qdio.in_buf_size = QETH_IN_BUF_SIZE_DEFAULT; if (IS_IQD(card)) card->qdio.init_pool.buf_count = QETH_IN_BUF_COUNT_HSDEFAULT; @@ -2558,6 +2589,10 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) QETH_CARD_TEXT(card, 2, "allcqdbf"); + /* completion */ + if (qeth_alloc_cq(card)) + goto out_err; + if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED, QETH_QDIO_ALLOCATED) != QETH_QDIO_UNINITIALIZED) return 0; @@ -2598,10 +2633,6 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) queue->priority = QETH_QIB_PQUE_PRIO_DEFAULT; } - /* completion */ - if (qeth_alloc_cq(card)) - goto out_freeoutq; - return 0; out_freeoutq: @@ -2615,6 +2646,8 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) card->qdio.in_q = NULL; out_nomem: atomic_set(&card->qdio.state, QETH_QDIO_UNINITIALIZED); + qeth_free_cq(card); +out_err: return -ENOMEM; } @@ -2622,11 +2655,12 @@ static void qeth_free_qdio_queues(struct qeth_card *card) { int i, j; + qeth_free_cq(card); + if (atomic_xchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED) == QETH_QDIO_UNINITIALIZED) return; - qeth_free_cq(card); for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { if (card->qdio.in_q->bufs[j].rx_skb) dev_kfree_skb_any(card->qdio.in_q->bufs[j].rx_skb); @@ -3679,24 +3713,11 @@ static void qeth_qdio_poll(struct ccw_device *cdev, unsigned long card_ptr) int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq) { - int rc; - - if (card->options.cq == QETH_CQ_NOTAVAILABLE) { - rc = -1; - goto out; - } else { - if (card->options.cq == cq) { - rc = 0; - goto out; - } - - qeth_free_qdio_queues(card); - card->options.cq = cq; - rc = 0; - } -out: - return rc; + if (card->options.cq == QETH_CQ_NOTAVAILABLE) + return -1; + card->options.cq = cq; + return 0; } EXPORT_SYMBOL_GPL(qeth_configure_cq); @@ -5139,6 +5160,7 @@ static int qeth_qdio_establish(struct qeth_card *card) struct qdio_buffer **in_sbal_ptrs[QETH_MAX_IN_QUEUES]; struct qeth_qib_parms *qib_parms = NULL; struct qdio_initialize init_data; + unsigned int no_input_qs = 1; unsigned int i; int rc = 0; @@ -5153,8 +5175,10 @@ static int qeth_qdio_establish(struct qeth_card *card) } in_sbal_ptrs[0] = card->qdio.in_q->qdio_bufs; - if (card->options.cq == QETH_CQ_ENABLED) + if (card->options.cq == QETH_CQ_ENABLED) { in_sbal_ptrs[1] = card->qdio.c_q->qdio_bufs; + no_input_qs++; + } for (i = 0; i < card->qdio.no_out_queues; i++) out_sbal_ptrs[i] = card->qdio.out_qs[i]->qdio_bufs; @@ -5164,7 +5188,7 @@ static int qeth_qdio_establish(struct qeth_card *card) QDIO_QETH_QFMT; init_data.qib_param_field_format = 0; init_data.qib_param_field = (void *)qib_parms; - init_data.no_input_qs = card->qdio.no_in_queues; + init_data.no_input_qs = no_input_qs; init_data.no_output_qs = card->qdio.no_out_queues; init_data.input_handler = qeth_qdio_input_handler; init_data.output_handler = qeth_qdio_output_handler; diff --git a/drivers/scsi/bfa/bfad_debugfs.c b/drivers/scsi/bfa/bfad_debugfs.c index fd1b378a26..d3c7d4423c 100644 --- a/drivers/scsi/bfa/bfad_debugfs.c +++ b/drivers/scsi/bfa/bfad_debugfs.c @@ -250,7 +250,7 @@ bfad_debugfs_write_regrd(struct file *file, const char __user *buf, unsigned long flags; void *kern_buf; - kern_buf = memdup_user(buf, nbytes); + kern_buf = memdup_user_nul(buf, nbytes); if (IS_ERR(kern_buf)) return PTR_ERR(kern_buf); @@ -317,7 +317,7 @@ bfad_debugfs_write_regwr(struct file *file, const char __user *buf, unsigned long flags; void *kern_buf; - kern_buf = memdup_user(buf, nbytes); + kern_buf = memdup_user_nul(buf, nbytes); if (IS_ERR(kern_buf)) return PTR_ERR(kern_buf); diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c index 9200b71808..5015d9b081 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c +++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c @@ -833,7 +833,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba, BNX2FC_TGT_DBG(tgt, "Freeing up session resources\n"); - spin_lock_bh(&tgt->cq_lock); ctx_base_ptr = tgt->ctx_base; tgt->ctx_base = NULL; @@ -889,7 +888,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba, tgt->sq, tgt->sq_dma); tgt->sq = NULL; } - spin_unlock_bh(&tgt->cq_lock); if (ctx_base_ptr) iounmap(ctx_base_ptr); diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 1b285ce62f..4caee4e324 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -334,12 +334,13 @@ static void scsi_host_dev_release(struct device *dev) if (shost->shost_state == SHOST_CREATED) { /* - * Free the shost_dev device name here if scsi_host_alloc() - * and scsi_host_put() have been called but neither + * Free the shost_dev device name and remove the proc host dir + * here if scsi_host_{alloc,put}() have been called but neither * scsi_host_add() nor scsi_host_remove() has been called. * This avoids that the memory allocated for the shost_dev - * name is leaked. + * name as well as the proc dir structure are leaked. */ + scsi_proc_hostdir_rm(shost->hostt); kfree(dev_name(&shost->shost_dev)); } diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 8aa5c22ae3..b54c8aa8e8 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -5848,7 +5848,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) { struct Scsi_Host *sh; - sh = scsi_host_alloc(&hpsa_driver_template, sizeof(struct ctlr_info)); + sh = scsi_host_alloc(&hpsa_driver_template, sizeof(struct ctlr_info *)); if (sh == NULL) { dev_err(&h->pdev->dev, "scsi_host_alloc failed\n"); return -ENOMEM; diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 9ae3563113..ee6607dfcd 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -256,8 +256,7 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id, void *rsp) /* help some expanders that fail to zero sas_address in the 'no * device' case */ - if (phy->attached_dev_type == SAS_PHY_UNUSED || - phy->linkrate < SAS_LINK_RATE_1_5_GBPS) + if (phy->attached_dev_type == SAS_PHY_UNUSED) memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); else memcpy(phy->attached_sas_addr, dr->attached_sas_addr, SAS_ADDR_SIZE); diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 65ac952b76..194825ff1e 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -1341,7 +1341,6 @@ struct lpfc_hba { unsigned long bit_flags; #define FABRIC_COMANDS_BLOCKED 0 atomic_t num_rsrc_err; - atomic_t num_cmd_success; unsigned long last_rsrc_error_time; unsigned long last_ramp_down_time; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 2bd35a7424..9b862f1254 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -724,8 +724,10 @@ lpfc_rcv_padisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, /* Save the ELS cmd */ elsiocb->drvrTimeout = cmd; - lpfc_sli4_resume_rpi(ndlp, - lpfc_mbx_cmpl_resume_rpi, elsiocb); + if (lpfc_sli4_resume_rpi(ndlp, + lpfc_mbx_cmpl_resume_rpi, + elsiocb)) + kfree(elsiocb); goto out; } } diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 4e0c0b273e..2ff8ace6f7 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2539,9 +2539,9 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) /* No concern about the role change on the nvme remoteport. * The transport will update it. */ - spin_lock_irq(&vport->phba->hbalock); + spin_lock_irq(&ndlp->lock); ndlp->fc4_xpt_flags |= NVME_XPT_UNREG_WAIT; - spin_unlock_irq(&vport->phba->hbalock); + spin_unlock_irq(&ndlp->lock); /* Don't let the host nvme transport keep sending keep-alives * on this remoteport. Vport is unloading, no recovery. The diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 6e3dd0b9bc..c58669c934 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1586,7 +1586,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) wqe = &nvmewqe->wqe; /* Initialize WQE */ - memset(wqe, 0, sizeof(union lpfc_wqe)); + memset(wqe, 0, sizeof(*wqe)); ctx_buf->iocbq->context1 = NULL; spin_lock(&phba->sli4_hba.sgl_list_lock); diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 6d1a3cbd6b..d9fb5e09fb 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -231,11 +231,10 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba) struct Scsi_Host *shost; struct scsi_device *sdev; unsigned long new_queue_depth; - unsigned long num_rsrc_err, num_cmd_success; + unsigned long num_rsrc_err; int i; num_rsrc_err = atomic_read(&phba->num_rsrc_err); - num_cmd_success = atomic_read(&phba->num_cmd_success); /* * The error and success command counters are global per @@ -250,20 +249,16 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba) for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) { shost = lpfc_shost_from_vport(vports[i]); shost_for_each_device(sdev, shost) { - new_queue_depth = - sdev->queue_depth * num_rsrc_err / - (num_rsrc_err + num_cmd_success); - if (!new_queue_depth) - new_queue_depth = sdev->queue_depth - 1; + if (num_rsrc_err >= sdev->queue_depth) + new_queue_depth = 1; else new_queue_depth = sdev->queue_depth - - new_queue_depth; + num_rsrc_err; scsi_change_queue_depth(sdev, new_queue_depth); } } lpfc_destroy_vport_work_array(phba, vports); atomic_set(&phba->num_rsrc_err, 0); - atomic_set(&phba->num_cmd_success, 0); } /** diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index da9a1f72d9..b1071226e2 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -651,10 +651,6 @@ lpfc_vport_delete(struct fc_vport *fc_vport) lpfc_free_sysfs_attr(vport); lpfc_debugfs_terminate(vport); - /* Remove FC host to break driver binding. */ - fc_remove_host(shost); - scsi_remove_host(shost); - /* Send the DA_ID and Fabric LOGO to cleanup Nameserver entries. */ ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) @@ -700,6 +696,10 @@ lpfc_vport_delete(struct fc_vport *fc_vport) skip_logo: + /* Remove FC host to break driver binding. */ + fc_remove_host(shost); + scsi_remove_host(shost); + lpfc_cleanup(vport); /* Remove scsi host now. The nodes are cleaned up. */ diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index 386256369d..2a61cb7268 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1775,9 +1775,9 @@ static ssize_t raid_state_show(struct device *dev, name = myrb_devstate_name(ldev_info->state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->state); } else { struct myrb_pdev_state *pdev_info = sdev->hostdata; @@ -1796,9 +1796,9 @@ static ssize_t raid_state_show(struct device *dev, else name = myrb_devstate_name(pdev_info->state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", pdev_info->state); } return ret; @@ -1886,11 +1886,11 @@ static ssize_t raid_level_show(struct device *dev, name = myrb_raidlevel_name(ldev_info->raid_level); if (!name) - return snprintf(buf, 32, "Invalid (%02X)\n", + return snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->state); - return snprintf(buf, 32, "%s\n", name); + return snprintf(buf, 64, "%s\n", name); } - return snprintf(buf, 32, "Physical Drive\n"); + return snprintf(buf, 64, "Physical Drive\n"); } static DEVICE_ATTR_RO(raid_level); @@ -1903,15 +1903,15 @@ static ssize_t rebuild_show(struct device *dev, unsigned char status; if (sdev->channel < myrb_logical_channel(sdev->host)) - return snprintf(buf, 32, "physical device - not rebuilding\n"); + return snprintf(buf, 64, "physical device - not rebuilding\n"); status = myrb_get_rbld_progress(cb, &rbld_buf); if (rbld_buf.ldev_num != sdev->id || status != MYRB_STATUS_SUCCESS) - return snprintf(buf, 32, "not rebuilding\n"); + return snprintf(buf, 64, "not rebuilding\n"); - return snprintf(buf, 32, "rebuilding block %u of %u\n", + return snprintf(buf, 64, "rebuilding block %u of %u\n", rbld_buf.ldev_size - rbld_buf.blocks_left, rbld_buf.ldev_size); } diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index a4d244ee45..3f05f13fb1 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -949,9 +949,9 @@ static ssize_t raid_state_show(struct device *dev, name = myrs_devstate_name(ldev_info->dev_state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->dev_state); } else { struct myrs_pdev_info *pdev_info; @@ -960,9 +960,9 @@ static ssize_t raid_state_show(struct device *dev, pdev_info = sdev->hostdata; name = myrs_devstate_name(pdev_info->dev_state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", pdev_info->dev_state); } return ret; @@ -1068,13 +1068,13 @@ static ssize_t raid_level_show(struct device *dev, ldev_info = sdev->hostdata; name = myrs_raid_level_name(ldev_info->raid_level); if (!name) - return snprintf(buf, 32, "Invalid (%02X)\n", + return snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->dev_state); } else name = myrs_raid_level_name(MYRS_RAID_PHYSICAL); - return snprintf(buf, 32, "%s\n", name); + return snprintf(buf, 64, "%s\n", name); } static DEVICE_ATTR_RO(raid_level); @@ -1088,7 +1088,7 @@ static ssize_t rebuild_show(struct device *dev, unsigned char status; if (sdev->channel < cs->ctlr_info->physchan_present) - return snprintf(buf, 32, "physical device - not rebuilding\n"); + return snprintf(buf, 64, "physical device - not rebuilding\n"); ldev_info = sdev->hostdata; ldev_num = ldev_info->ldev_num; @@ -1100,11 +1100,11 @@ static ssize_t rebuild_show(struct device *dev, return -EIO; } if (ldev_info->rbld_active) { - return snprintf(buf, 32, "rebuilding block %zu of %zu\n", + return snprintf(buf, 64, "rebuilding block %zu of %zu\n", (size_t)ldev_info->rbld_lba, (size_t)ldev_info->cfg_devsize); } else - return snprintf(buf, 32, "not rebuilding\n"); + return snprintf(buf, 64, "not rebuilding\n"); } static ssize_t rebuild_store(struct device *dev, @@ -1192,7 +1192,7 @@ static ssize_t consistency_check_show(struct device *dev, unsigned short ldev_num; if (sdev->channel < cs->ctlr_info->physchan_present) - return snprintf(buf, 32, "physical device - not checking\n"); + return snprintf(buf, 64, "physical device - not checking\n"); ldev_info = sdev->hostdata; if (!ldev_info) @@ -1200,11 +1200,11 @@ static ssize_t consistency_check_show(struct device *dev, ldev_num = ldev_info->ldev_num; myrs_get_ldev_info(cs, ldev_num, ldev_info); if (ldev_info->cc_active) - return snprintf(buf, 32, "checking block %zu of %zu\n", + return snprintf(buf, 64, "checking block %zu of %zu\n", (size_t)ldev_info->cc_lba, (size_t)ldev_info->cfg_devsize); else - return snprintf(buf, 32, "not checking\n"); + return snprintf(buf, 64, "not checking\n"); } static ssize_t consistency_check_store(struct device *dev, diff --git a/drivers/scsi/qedf/qedf_debugfs.c b/drivers/scsi/qedf/qedf_debugfs.c index 451fd236bf..96174353e3 100644 --- a/drivers/scsi/qedf/qedf_debugfs.c +++ b/drivers/scsi/qedf/qedf_debugfs.c @@ -170,7 +170,7 @@ qedf_dbg_debug_cmd_write(struct file *filp, const char __user *buffer, if (!count || *ppos) return 0; - kern_buf = memdup_user(buffer, count); + kern_buf = memdup_user_nul(buffer, count); if (IS_ERR(kern_buf)) return PTR_ERR(kern_buf); diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index fe0e8b23a8..6e2b809341 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2732,7 +2732,13 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport) return; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900c, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(fcport->vha); + qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, + 0, WAIT_TARGET); return; } } @@ -2754,7 +2760,11 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) vha = fcport->vha; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { - qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + /* Will wait for wind down of adapter */ + ql_dbg(ql_dbg_aer, fcport->vha, 0x900b, + "%s pci offline detected (id %06x)\n", __func__, + fcport->d_id.b24); + qla_pci_set_eeh_busy(vha); qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, 0, WAIT_TARGET); return; diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 3c876967e8..87ada9b447 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -78,7 +78,7 @@ typedef union { #include "qla_nvme.h" #define QLA2XXX_DRIVER_NAME "qla2xxx" #define QLA2XXX_APIDEV "ql2xapidev" -#define QLA2XXX_MANUFACTURER "QLogic Corporation" +#define QLA2XXX_MANUFACTURER "Marvell" /* * We have MAILBOX_REGISTER_COUNT sized arrays in a few places, diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c index af921fd150..73695c6815 100644 --- a/drivers/scsi/qla2xxx/qla_dfs.c +++ b/drivers/scsi/qla2xxx/qla_dfs.c @@ -274,7 +274,7 @@ qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused) seq_printf(s, "Driver: estimate iocb used [%d] high water limit [%d]\n", iocbs_used, ha->base_qpair->fwres.iocbs_limit); - seq_printf(s, "estimate exchange used[%d] high water limit [%d] n", + seq_printf(s, "estimate exchange used[%d] high water limit [%d]\n", exch_used, ha->base_qpair->fwres.exch_limit); if (ql2xenforce_iocb_limit == 2) { diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c index 40a03f9c2d..ac702f74dd 100644 --- a/drivers/scsi/qla2xxx/qla_edif.c +++ b/drivers/scsi/qla2xxx/qla_edif.c @@ -1012,7 +1012,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job) list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) { if (fcport->edif.enable) { - if (pcnt > app_req.num_ports) + if (pcnt >= app_req.num_ports) break; app_reply->elem[pcnt].rekey_count = diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 3861e41a8d..18376b50e3 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -44,7 +44,7 @@ extern int qla2x00_fabric_login(scsi_qla_host_t *, fc_port_t *, uint16_t *); extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *); extern int qla24xx_els_dcmd_iocb(scsi_qla_host_t *, int, port_id_t); -extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *, bool); +extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *); extern void qla2x00_els_dcmd2_free(scsi_qla_host_t *vha, struct els_plogi *els_plogi); diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index d3742a83d2..b0f3bf42c3 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -1615,7 +1615,7 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries, eiter->type = cpu_to_be16(FDMI_HBA_MANUFACTURER); alen = scnprintf( eiter->a.manufacturer, sizeof(eiter->a.manufacturer), - "%s", "QLogic Corporation"); + "%s", QLA2XXX_MANUFACTURER); alen += FDMI_ATTR_ALIGNMENT(alen); alen += FDMI_ATTR_TYPELEN(eiter); eiter->len = cpu_to_be16(alen); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index b59d5b560a..531e0ea872 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1188,8 +1188,12 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - /* ref: INIT */ - kref_put(&sp->cmd_kref, qla2x00_sp_release); + /* + * use qla24xx_async_gnl_sp_done to purge all pending gnl request. + * kref_put is call behind the scene. + */ + sp->u.iocb_cmd.u.mbx.in_mb[0] = MBS_COMMAND_ERROR; + qla24xx_async_gnl_sp_done(sp, QLA_COMMAND_ERROR); fcport->flags &= ~(FCF_ASYNC_SENT); done: fcport->flags &= ~(FCF_ASYNC_ACTIVE); @@ -2667,6 +2671,40 @@ qla83xx_nic_core_fw_load(scsi_qla_host_t *vha) return rval; } +static void qla_enable_fce_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->fce) { + ha->flags.fce_enabled = 1; + memset(ha->fce, 0, fce_calc_size(ha->fce_bufs)); + rval = qla2x00_enable_fce_trace(vha, + ha->fce_dma, ha->fce_bufs, ha->fce_mb, &ha->fce_bufs); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8033, + "Unable to reinitialize FCE (%d).\n", rval); + ha->flags.fce_enabled = 0; + } + } +} + +static void qla_enable_eft_trace(scsi_qla_host_t *vha) +{ + int rval; + struct qla_hw_data *ha = vha->hw; + + if (ha->eft) { + memset(ha->eft, 0, EFT_SIZE); + rval = qla2x00_enable_eft_trace(vha, ha->eft_dma, EFT_NUM_BUFFERS); + + if (rval) { + ql_log(ql_log_warn, vha, 0x8034, + "Unable to reinitialize EFT (%d).\n", rval); + } + } +} /* * qla2x00_initialize_adapter * Initialize board. @@ -3670,9 +3708,8 @@ qla24xx_chip_diag(scsi_qla_host_t *vha) } static void -qla2x00_init_fce_trace(scsi_qla_host_t *vha) +qla2x00_alloc_fce_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3701,27 +3738,17 @@ qla2x00_init_fce_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS, - ha->fce_mb, &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x00bf, - "Unable to initialize FCE (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c0, "Allocated (%d KB) for FCE...\n", FCE_SIZE / 1024); - ha->flags.fce_enabled = 1; ha->fce_dma = tc_dma; ha->fce = tc; + ha->fce_bufs = FCE_NUM_BUFFERS; } static void -qla2x00_init_eft_trace(scsi_qla_host_t *vha) +qla2x00_alloc_eft_trace(scsi_qla_host_t *vha) { - int rval; dma_addr_t tc_dma; void *tc; struct qla_hw_data *ha = vha->hw; @@ -3746,14 +3773,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) return; } - rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x00c2, - "Unable to initialize EFT (%d).\n", rval); - dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc, tc_dma); - return; - } - ql_dbg(ql_dbg_init, vha, 0x00c3, "Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024); @@ -3761,13 +3780,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha) ha->eft = tc; } -static void -qla2x00_alloc_offload_mem(scsi_qla_host_t *vha) -{ - qla2x00_init_fce_trace(vha); - qla2x00_init_eft_trace(vha); -} - void qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) { @@ -3822,10 +3834,10 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) if (ha->tgt.atio_ring) mq_size += ha->tgt.atio_q_length * sizeof(request_t); - qla2x00_init_fce_trace(vha); + qla2x00_alloc_fce_trace(vha); if (ha->fce) fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE; - qla2x00_init_eft_trace(vha); + qla2x00_alloc_eft_trace(vha); if (ha->eft) eft_size = EFT_SIZE; } @@ -4255,7 +4267,6 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) struct qla_hw_data *ha = vha->hw; struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; unsigned long flags; - uint16_t fw_major_version; int done_once = 0; if (IS_P3P_TYPE(ha)) { @@ -4322,7 +4333,6 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) goto failed; enable_82xx_npiv: - fw_major_version = ha->fw_major_version; if (IS_P3P_TYPE(ha)) qla82xx_check_md_needed(vha); else @@ -4351,12 +4361,11 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) if (rval != QLA_SUCCESS) goto failed; - if (!fw_major_version && !(IS_P3P_TYPE(ha))) - qla2x00_alloc_offload_mem(vha); - if (ql2xallocfwdump && !(IS_P3P_TYPE(ha))) qla2x00_alloc_fw_dump(vha); + qla_enable_fce_trace(vha); + qla_enable_eft_trace(vha); } else { goto failed; } @@ -5136,7 +5145,7 @@ qla2x00_set_model_info(scsi_qla_host_t *vha, uint8_t *model, size_t len, if (use_tbl && ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC && index < QLA_MODEL_NAMES) - strlcpy(ha->model_desc, + strscpy(ha->model_desc, qla2x00_model_name[index * 2 + 1], sizeof(ha->model_desc)); } else { @@ -5144,14 +5153,14 @@ qla2x00_set_model_info(scsi_qla_host_t *vha, uint8_t *model, size_t len, if (use_tbl && ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC && index < QLA_MODEL_NAMES) { - strlcpy(ha->model_number, + strscpy(ha->model_number, qla2x00_model_name[index * 2], sizeof(ha->model_number)); - strlcpy(ha->model_desc, + strscpy(ha->model_desc, qla2x00_model_name[index * 2 + 1], sizeof(ha->model_desc)); } else { - strlcpy(ha->model_number, def, + strscpy(ha->model_number, def, sizeof(ha->model_number)); } } @@ -7568,12 +7577,12 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) int qla2x00_abort_isp(scsi_qla_host_t *vha) { - int rval; uint8_t status = 0; struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *vp, *tvp; struct req_que *req = ha->req_q_map[0]; unsigned long flags; + fc_port_t *fcport; if (vha->flags.online) { qla2x00_abort_isp_cleanup(vha); @@ -7639,6 +7648,15 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) "ISP Abort - ISP reg disconnect post nvmram config, exiting.\n"); return status; } + + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + if (!qla2x00_restart_isp(vha)) { clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags); @@ -7659,31 +7677,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) if (IS_QLA81XX(ha) || IS_QLA8031(ha)) qla2x00_get_fw_version(vha); - if (ha->fce) { - ha->flags.fce_enabled = 1; - memset(ha->fce, 0, - fce_calc_size(ha->fce_bufs)); - rval = qla2x00_enable_fce_trace(vha, - ha->fce_dma, ha->fce_bufs, ha->fce_mb, - &ha->fce_bufs); - if (rval) { - ql_log(ql_log_warn, vha, 0x8033, - "Unable to reinitialize FCE " - "(%d).\n", rval); - ha->flags.fce_enabled = 0; - } - } - if (ha->eft) { - memset(ha->eft, 0, EFT_SIZE); - rval = qla2x00_enable_eft_trace(vha, - ha->eft_dma, EFT_NUM_BUFFERS); - if (rval) { - ql_log(ql_log_warn, vha, 0x8034, - "Unable to reinitialize EFT " - "(%d).\n", rval); - } - } } else { /* failed the ISP abort */ vha->flags.online = 1; if (test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { @@ -7733,6 +7727,14 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) atomic_inc(&vp->vref_count); spin_unlock_irqrestore(&ha->vport_slock, flags); + /* User may have updated [fcp|nvme] prefer in flash */ + list_for_each_entry(fcport, &vp->vp_fcports, list) { + if (NVME_PRIORITY(ha, fcport)) + fcport->do_prli_nvme = 1; + else + fcport->do_prli_nvme = 0; + } + qla2x00_vp_abort_isp(vp); spin_lock_irqsave(&ha->vport_slock, flags); diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 9e524d52dc..7bccd525ee 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2588,6 +2588,33 @@ void qla2x00_sp_release(struct kref *kref) { struct srb *sp = container_of(kref, struct srb, cmd_kref); + struct scsi_qla_host *vha = sp->vha; + + switch (sp->type) { + case SRB_CT_PTHRU_CMD: + /* GPSC & GFPNID use fcport->ct_desc.ct_sns for both req & rsp */ + if (sp->u.iocb_cmd.u.ctarg.req && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.req != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.req_allocated_size, + sp->u.iocb_cmd.u.ctarg.req, + sp->u.iocb_cmd.u.ctarg.req_dma); + sp->u.iocb_cmd.u.ctarg.req = NULL; + } + if (sp->u.iocb_cmd.u.ctarg.rsp && + (!sp->fcport || + sp->u.iocb_cmd.u.ctarg.rsp != sp->fcport->ct_desc.ct_sns)) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.rsp_allocated_size, + sp->u.iocb_cmd.u.ctarg.rsp, + sp->u.iocb_cmd.u.ctarg.rsp_dma); + sp->u.iocb_cmd.u.ctarg.rsp = NULL; + } + break; + default: + break; + } sp->free(sp); } @@ -2611,7 +2638,8 @@ static void qla2x00_els_dcmd_sp_free(srb_t *sp) { struct srb_iocb *elsio = &sp->u.iocb_cmd; - kfree(sp->fcport); + if (sp->fcport) + qla2x00_free_fcport(sp->fcport); if (elsio->u.els_logo.els_logo_pyld) dma_free_coherent(&sp->vha->hw->pdev->dev, DMA_POOL_SIZE, @@ -2693,7 +2721,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) { - kfree(fcport); + qla2x00_free_fcport(fcport); ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); return -ENOMEM; @@ -2724,6 +2752,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_logo.els_logo_pyld) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -2748,6 +2777,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (rval != QLA_SUCCESS) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -3013,7 +3043,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, - fc_port_t *fcport, bool wait) + fc_port_t *fcport) { srb_t *sp; struct srb_iocb *elsio = NULL; @@ -3028,8 +3058,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!sp) { ql_log(ql_log_info, vha, 0x70e6, "SRB allocation failed\n"); - fcport->flags &= ~FCF_ASYNC_ACTIVE; - return -ENOMEM; + goto done; } fcport->flags |= FCF_ASYNC_SENT; @@ -3038,9 +3067,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, ql_dbg(ql_dbg_io, vha, 0x3073, "%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24); - if (wait) - sp->flags = SRB_WAKEUP_ON_COMP; - sp->type = SRB_ELS_DCMD; sp->name = "ELS_DCMD"; sp->fcport = fcport; @@ -3056,7 +3082,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_plogi_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } resp_ptr = elsio->u.els_plogi.els_resp_pyld = @@ -3065,7 +3091,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_plogi.els_resp_pyld) { rval = QLA_FUNCTION_FAILED; - goto out; + goto done_free_sp; } ql_dbg(ql_dbg_io, vha, 0x3073, "PLOGI %p %p\n", ptr, resp_ptr); @@ -3080,7 +3106,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) { struct fc_els_flogi *p = ptr; - p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC); } @@ -3089,10 +3114,11 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, (uint8_t *)elsio->u.els_plogi.els_plogi_pyld, sizeof(*elsio->u.els_plogi.els_plogi_pyld)); - init_completion(&elsio->u.els_plogi.comp); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { - rval = QLA_FUNCTION_FAILED; + fcport->flags |= FCF_LOGIN_NEEDED; + set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + goto done_free_sp; } else { ql_dbg(ql_dbg_disc, vha, 0x3074, "%s PLOGI sent, hdl=%x, loopid=%x, to port_id %06x from port_id %06x\n", @@ -3100,21 +3126,15 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, fcport->d_id.b24, vha->d_id.b24); } - if (wait) { - wait_for_completion(&elsio->u.els_plogi.comp); - - if (elsio->u.els_plogi.comp_status != CS_COMPLETE) - rval = QLA_FUNCTION_FAILED; - } else { - goto done; - } + return rval; -out: - fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); +done_free_sp: qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi); /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); done: + fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); + qla2x00_set_fcport_disc_state(fcport, DSC_DELETED); return rval; } @@ -3916,7 +3936,7 @@ qla2x00_start_sp(srb_t *sp) return -EAGAIN; } - pkt = __qla2x00_alloc_iocbs(sp->qpair, sp); + pkt = qla2x00_alloc_iocbs_ready(sp->qpair, sp); if (!pkt) { rval = -EAGAIN; ql_log(ql_log_warn, vha, 0x700c, diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 511f31611a..d2ff54beb7 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -194,7 +194,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (ha->flags.purge_mbox || chip_reset != ha->chip_reset || ha->flags.eeh_busy) { ql_log(ql_log_warn, vha, 0xd035, - "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", + "Purge mbox: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]); rval = QLA_ABORTED; goto premature_exit; diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index f726eb8449..083f94e43f 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -691,7 +691,7 @@ qlafx00_pci_info_str(struct scsi_qla_host *vha, char *str, size_t str_len) struct qla_hw_data *ha = vha->hw; if (pci_is_pcie(ha->pdev)) - strlcpy(str, "PCIe iSA", str_len); + strscpy(str, "PCIe iSA", str_len); return str; } @@ -1850,21 +1850,21 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type) phost_info = &preg_hsi->hsi; memset(preg_hsi, 0, sizeof(struct register_host_info)); phost_info->os_type = OS_TYPE_LINUX; - strlcpy(phost_info->sysname, p_sysid->sysname, + strscpy(phost_info->sysname, p_sysid->sysname, sizeof(phost_info->sysname)); - strlcpy(phost_info->nodename, p_sysid->nodename, + strscpy(phost_info->nodename, p_sysid->nodename, sizeof(phost_info->nodename)); if (!strcmp(phost_info->nodename, "(none)")) ha->mr.host_info_resend = true; - strlcpy(phost_info->release, p_sysid->release, + strscpy(phost_info->release, p_sysid->release, sizeof(phost_info->release)); - strlcpy(phost_info->version, p_sysid->version, + strscpy(phost_info->version, p_sysid->version, sizeof(phost_info->version)); - strlcpy(phost_info->machine, p_sysid->machine, + strscpy(phost_info->machine, p_sysid->machine, sizeof(phost_info->machine)); - strlcpy(phost_info->domainname, p_sysid->domainname, + strscpy(phost_info->domainname, p_sysid->domainname, sizeof(phost_info->domainname)); - strlcpy(phost_info->hostdriver, QLA2XXX_VERSION, + strscpy(phost_info->hostdriver, QLA2XXX_VERSION, sizeof(phost_info->hostdriver)); preg_hsi->utc = (uint64_t)ktime_get_real_seconds(); ql_dbg(ql_dbg_init, vha, 0x0149, @@ -1909,9 +1909,9 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type) if (fx_type == FXDISC_GET_CONFIG_INFO) { struct config_info_data *pinfo = (struct config_info_data *) fdisc->u.fxiocb.rsp_addr; - strlcpy(vha->hw->model_number, pinfo->model_num, + strscpy(vha->hw->model_number, pinfo->model_num, ARRAY_SIZE(vha->hw->model_number)); - strlcpy(vha->hw->model_desc, pinfo->model_description, + strscpy(vha->hw->model_desc, pinfo->model_description, ARRAY_SIZE(vha->hw->model_desc)); memcpy(&vha->hw->mr.symbolic_name, pinfo->symbolic_name, sizeof(vha->hw->mr.symbolic_name)); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index a2d3f8bb63..f418d43ee8 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5530,7 +5530,7 @@ qla2x00_do_work(struct scsi_qla_host *vha) break; case QLA_EVT_ELS_PLOGI: qla24xx_els_dcmd2_iocb(vha, ELS_DCMD_PLOGI, - e->u.fcport.fcport, false); + e->u.fcport.fcport); break; case QLA_EVT_SA_REPLACE: rc = qla24xx_issue_sa_replace_iocb(vha, e); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index ef46dce739..aac36e730a 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1083,6 +1083,16 @@ void qlt_free_session_done(struct work_struct *work) "%s: sess %p logout completed\n", __func__, sess); } + /* check for any straggling io left behind */ + if (!(sess->flags & FCF_FCP2_DEVICE) && + qla2x00_eh_wait_for_pending_commands(sess->vha, sess->d_id.b24, 0, WAIT_TARGET)) { + ql_log(ql_log_warn, vha, 0x3027, + "IO not return. Resetting.\n"); + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + qla2x00_wait_for_chip_reset(vha); + } + if (sess->logo_ack_needed) { sess->logo_ack_needed = 0; qla24xx_async_notify_ack(vha, sess, diff --git a/drivers/scsi/ufs/cdns-pltfrm.c b/drivers/scsi/ufs/cdns-pltfrm.c index 7da8be2f35..07a2580e4e 100644 --- a/drivers/scsi/ufs/cdns-pltfrm.c +++ b/drivers/scsi/ufs/cdns-pltfrm.c @@ -136,7 +136,7 @@ static int cdns_ufs_set_hclkdiv(struct ufs_hba *hba) * Make sure the register was updated, * UniPro layer will not work with an incorrect value. */ - mb(); + ufshcd_readl(hba, CDNS_UFS_REG_HCLKDIV); return 0; } diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index f810b99ef5..77a11cba69 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -242,8 +242,9 @@ static void ufs_qcom_select_unipro_mode(struct ufs_qcom_host *host) ufshcd_rmwl(host->hba, QUNIPRO_SEL, ufs_qcom_cap_qunipro(host) ? QUNIPRO_SEL : 0, REG_UFS_CFG1); - /* make sure above configuration is applied before we return */ - mb(); + + if (host->hw_ver.major >= 0x05) + ufshcd_rmwl(host->hba, QUNIPRO_G4_SEL, 0, REG_UFS_CFG0); } /* @@ -352,7 +353,7 @@ static void ufs_qcom_enable_hw_clk_gating(struct ufs_hba *hba) REG_UFS_CFG2); /* Ensure that HW clock gating is enabled before next operations */ - mb(); + ufshcd_readl(hba, REG_UFS_CFG2); } static int ufs_qcom_hce_enable_notify(struct ufs_hba *hba, @@ -449,7 +450,7 @@ static int ufs_qcom_cfg_timers(struct ufs_hba *hba, u32 gear, * make sure above write gets applied before we return from * this function. */ - mb(); + ufshcd_readl(hba, REG_UFS_SYS1CLK_1US); } if (ufs_qcom_cap_qunipro(host)) @@ -515,9 +516,9 @@ static int ufs_qcom_cfg_timers(struct ufs_hba *hba, u32 gear, mb(); } - if (update_link_startup_timer) { + if (update_link_startup_timer && host->hw_ver.major != 0x5) { ufshcd_writel(hba, ((core_clk_rate / MSEC_PER_SEC) * 100), - REG_UFS_PA_LINK_STARTUP_TIMER); + REG_UFS_CFG0); /* * make sure that this configuration is applied before * we return diff --git a/drivers/scsi/ufs/ufs-qcom.h b/drivers/scsi/ufs/ufs-qcom.h index 8208e3a3ef..6b4584893c 100644 --- a/drivers/scsi/ufs/ufs-qcom.h +++ b/drivers/scsi/ufs/ufs-qcom.h @@ -35,8 +35,10 @@ enum { REG_UFS_TX_SYMBOL_CLK_NS_US = 0xC4, REG_UFS_LOCAL_PORT_ID_REG = 0xC8, REG_UFS_PA_ERR_CODE = 0xCC, - REG_UFS_RETRY_TIMER_REG = 0xD0, - REG_UFS_PA_LINK_STARTUP_TIMER = 0xD8, + /* On older UFS revisions, this register is called "RETRY_TIMER_REG" */ + REG_UFS_PARAM0 = 0xD0, + /* On older UFS revisions, this register is called "REG_UFS_PA_LINK_STARTUP_TIMER" */ + REG_UFS_CFG0 = 0xD8, REG_UFS_CFG1 = 0xDC, REG_UFS_CFG2 = 0xE0, REG_UFS_HW_VERSION = 0xE4, @@ -74,6 +76,9 @@ enum { #define UFS_CNTLR_2_x_x_VEN_REGS_OFFSET(x) (0x000 + x) #define UFS_CNTLR_3_x_x_VEN_REGS_OFFSET(x) (0x400 + x) +/* bit definitions for REG_UFS_CFG0 register */ +#define QUNIPRO_G4_SEL BIT(5) + /* bit definitions for REG_UFS_CFG1 register */ #define QUNIPRO_SEL 0x1 #define UTP_DBG_RAMS_EN 0x20000 @@ -145,10 +150,10 @@ static inline void ufs_qcom_assert_reset(struct ufs_hba *hba) 1 << OFFSET_UFS_PHY_SOFT_RESET, REG_UFS_CFG1); /* - * Make sure assertion of ufs phy reset is written to - * register before returning + * Dummy read to ensure the write takes effect before doing any sort + * of delay */ - mb(); + ufshcd_readl(hba, REG_UFS_CFG1); } static inline void ufs_qcom_deassert_reset(struct ufs_hba *hba) @@ -157,10 +162,10 @@ static inline void ufs_qcom_deassert_reset(struct ufs_hba *hba) 0 << OFFSET_UFS_PHY_SOFT_RESET, REG_UFS_CFG1); /* - * Make sure de-assertion of ufs phy reset is written to - * register before returning + * Dummy read to ensure the write takes effect before doing any sort + * of delay */ - mb(); + ufshcd_readl(hba, REG_UFS_CFG1); } /* Host controller hardware version: major.minor.step */ diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 03b33c34f7..b7abf1f641 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3960,7 +3960,7 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) * Make sure UIC command completion interrupt is disabled before * issuing UIC command. */ - wmb(); + ufshcd_readl(hba, REG_INTERRUPT_ENABLE); reenable_intr = true; } spin_unlock_irqrestore(hba->host->host_lock, flags); @@ -9500,7 +9500,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) * Make sure that UFS interrupts are disabled and any pending interrupt * status is cleared before registering UFS interrupt handler. */ - mb(); + ufshcd_readl(hba, REG_INTERRUPT_ENABLE); /* IRQ registration */ err = devm_request_irq(dev, irq, ufshcd_intr, IRQF_SHARED, UFSHCD, hba); diff --git a/drivers/slimbus/core.c b/drivers/slimbus/core.c index 78480e332a..d1e3de8448 100644 --- a/drivers/slimbus/core.c +++ b/drivers/slimbus/core.c @@ -436,8 +436,8 @@ static int slim_device_alloc_laddr(struct slim_device *sbdev, if (ret < 0) goto err; } else if (report_present) { - ret = ida_simple_get(&ctrl->laddr_ida, - 0, SLIM_LA_MANAGER - 1, GFP_KERNEL); + ret = ida_alloc_max(&ctrl->laddr_ida, + SLIM_LA_MANAGER - 1, GFP_KERNEL); if (ret < 0) goto err; diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index 21519ce05b..286997adb6 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1376,7 +1376,11 @@ static void qcom_slim_ngd_up_worker(struct work_struct *work) ctrl = container_of(work, struct qcom_slim_ngd_ctrl, ngd_up_work); /* Make sure qmi service is up before continuing */ - wait_for_completion_interruptible(&ctrl->qmi_up); + if (!wait_for_completion_interruptible_timeout(&ctrl->qmi_up, + msecs_to_jiffies(MSEC_PER_SEC))) { + dev_err(ctrl->dev, "QMI wait timeout\n"); + return; + } mutex_lock(&ctrl->ssr_lock); qcom_slim_ngd_enable(ctrl, true); diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index fde4edd83c..7e9074519a 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -991,7 +991,7 @@ struct qman_portal { /* linked-list of CSCN handlers. */ struct list_head cgr_cbs; /* list lock */ - spinlock_t cgr_lock; + raw_spinlock_t cgr_lock; struct work_struct congestion_work; struct work_struct mr_work; char irqname[MAX_IRQNAME]; @@ -1281,7 +1281,7 @@ static int qman_create_portal(struct qman_portal *portal, /* if the given mask is NULL, assume all CGRs can be seen */ qman_cgrs_fill(&portal->cgrs[0]); INIT_LIST_HEAD(&portal->cgr_cbs); - spin_lock_init(&portal->cgr_lock); + raw_spin_lock_init(&portal->cgr_lock); INIT_WORK(&portal->congestion_work, qm_congestion_task); INIT_WORK(&portal->mr_work, qm_mr_process_task); portal->bits = 0; @@ -1456,11 +1456,14 @@ static void qm_congestion_task(struct work_struct *work) union qm_mc_result *mcr; struct qman_cgr *cgr; - spin_lock(&p->cgr_lock); + /* + * FIXME: QM_MCR_TIMEOUT is 10ms, which is too long for a raw spinlock! + */ + raw_spin_lock_irq(&p->cgr_lock); qm_mc_start(&p->p); qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); if (!qm_mc_result_timeout(&p->p, &mcr)) { - spin_unlock(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); dev_crit(p->config->dev, "QUERYCONGESTION timeout\n"); qman_p_irqsource_add(p, QM_PIRQ_CSCI); return; @@ -1476,7 +1479,7 @@ static void qm_congestion_task(struct work_struct *work) list_for_each_entry(cgr, &p->cgr_cbs, node) if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid)) cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid)); - spin_unlock(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); qman_p_irqsource_add(p, QM_PIRQ_CSCI); } @@ -2440,7 +2443,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, preempt_enable(); cgr->chan = p->config->channel; - spin_lock(&p->cgr_lock); + raw_spin_lock_irq(&p->cgr_lock); if (opts) { struct qm_mcc_initcgr local_opts = *opts; @@ -2477,19 +2480,14 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, qman_cgrs_get(&p->cgrs[1], cgr->cgrid)) cgr->cb(p, cgr, 1); out: - spin_unlock(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); put_affine_portal(); return ret; } EXPORT_SYMBOL(qman_create_cgr); -int qman_delete_cgr(struct qman_cgr *cgr) +static struct qman_portal *qman_cgr_get_affine_portal(struct qman_cgr *cgr) { - unsigned long irqflags; - struct qm_mcr_querycgr cgr_state; - struct qm_mcc_initcgr local_opts; - int ret = 0; - struct qman_cgr *i; struct qman_portal *p = get_affine_portal(); if (cgr->chan != p->config->channel) { @@ -2497,12 +2495,27 @@ int qman_delete_cgr(struct qman_cgr *cgr) dev_err(p->config->dev, "CGR not owned by current portal"); dev_dbg(p->config->dev, " create 0x%x, delete 0x%x\n", cgr->chan, p->config->channel); - - ret = -EINVAL; - goto put_portal; + put_affine_portal(); + return NULL; } + + return p; +} + +int qman_delete_cgr(struct qman_cgr *cgr) +{ + unsigned long irqflags; + struct qm_mcr_querycgr cgr_state; + struct qm_mcc_initcgr local_opts; + int ret = 0; + struct qman_cgr *i; + struct qman_portal *p = qman_cgr_get_affine_portal(cgr); + + if (!p) + return -EINVAL; + memset(&local_opts, 0, sizeof(struct qm_mcc_initcgr)); - spin_lock_irqsave(&p->cgr_lock, irqflags); + raw_spin_lock_irqsave(&p->cgr_lock, irqflags); list_del(&cgr->node); /* * If there are no other CGR objects for this CGRID in the list, @@ -2527,8 +2540,7 @@ int qman_delete_cgr(struct qman_cgr *cgr) /* add back to the list */ list_add(&cgr->node, &p->cgr_cbs); release_lock: - spin_unlock_irqrestore(&p->cgr_lock, irqflags); -put_portal: + raw_spin_unlock_irqrestore(&p->cgr_lock, irqflags); put_affine_portal(); return ret; } @@ -2559,6 +2571,54 @@ void qman_delete_cgr_safe(struct qman_cgr *cgr) } EXPORT_SYMBOL(qman_delete_cgr_safe); +static int qman_update_cgr(struct qman_cgr *cgr, struct qm_mcc_initcgr *opts) +{ + int ret; + unsigned long irqflags; + struct qman_portal *p = qman_cgr_get_affine_portal(cgr); + + if (!p) + return -EINVAL; + + raw_spin_lock_irqsave(&p->cgr_lock, irqflags); + ret = qm_modify_cgr(cgr, 0, opts); + raw_spin_unlock_irqrestore(&p->cgr_lock, irqflags); + put_affine_portal(); + return ret; +} + +struct update_cgr_params { + struct qman_cgr *cgr; + struct qm_mcc_initcgr *opts; + int ret; +}; + +static void qman_update_cgr_smp_call(void *p) +{ + struct update_cgr_params *params = p; + + params->ret = qman_update_cgr(params->cgr, params->opts); +} + +int qman_update_cgr_safe(struct qman_cgr *cgr, struct qm_mcc_initcgr *opts) +{ + struct update_cgr_params params = { + .cgr = cgr, + .opts = opts, + }; + + preempt_disable(); + if (qman_cgr_cpus[cgr->cgrid] != smp_processor_id()) + smp_call_function_single(qman_cgr_cpus[cgr->cgrid], + qman_update_cgr_smp_call, ¶ms, + true); + else + params.ret = qman_update_cgr(cgr, opts); + preempt_enable(); + return params.ret; +} +EXPORT_SYMBOL(qman_update_cgr_safe); + /* Cleanup FQs */ static int _qm_mr_consume_and_match_verb(struct qm_portal *p, int v) diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c index 3c8e4212d9..40fb935818 100644 --- a/drivers/soc/mediatek/mtk-cmdq-helper.c +++ b/drivers/soc/mediatek/mtk-cmdq-helper.c @@ -13,7 +13,8 @@ #define CMDQ_POLL_ENABLE_MASK BIT(0) #define CMDQ_EOC_IRQ_EN BIT(0) #define CMDQ_REG_TYPE 1 -#define CMDQ_JUMP_RELATIVE 1 +#define CMDQ_JUMP_RELATIVE 0 +#define CMDQ_JUMP_ABSOLUTE 1 struct cmdq_instruction { union { @@ -396,7 +397,7 @@ int cmdq_pkt_jump(struct cmdq_pkt *pkt, dma_addr_t addr) struct cmdq_instruction inst = {}; inst.op = CMDQ_CODE_JUMP; - inst.offset = CMDQ_JUMP_RELATIVE; + inst.offset = CMDQ_JUMP_ABSOLUTE; inst.value = addr >> cmdq_get_shift_pa(((struct cmdq_client *)pkt->cl)->chan); return cmdq_pkt_append_command(pkt, inst); diff --git a/drivers/soc/qcom/cmd-db.c b/drivers/soc/qcom/cmd-db.c index dd872017f3..b4803f2fde 100644 --- a/drivers/soc/qcom/cmd-db.c +++ b/drivers/soc/qcom/cmd-db.c @@ -1,6 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2016-2018, 2020, The Linux Foundation. All rights reserved. */ +/* + * Copyright (c) 2016-2018, 2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ +#include #include #include #include @@ -17,6 +21,8 @@ #define MAX_SLV_ID 8 #define SLAVE_ID_MASK 0x7 #define SLAVE_ID_SHIFT 16 +#define SLAVE_ID(addr) FIELD_GET(GENMASK(19, 16), addr) +#define VRM_ADDR(addr) FIELD_GET(GENMASK(19, 4), addr) /** * struct entry_header: header for each entry in cmddb @@ -216,6 +222,30 @@ const void *cmd_db_read_aux_data(const char *id, size_t *len) } EXPORT_SYMBOL(cmd_db_read_aux_data); +/** + * cmd_db_match_resource_addr() - Compare if both Resource addresses are same + * + * @addr1: Resource address to compare + * @addr2: Resource address to compare + * + * Return: true if two addresses refer to the same resource, false otherwise + */ +bool cmd_db_match_resource_addr(u32 addr1, u32 addr2) +{ + /* + * Each RPMh VRM accelerator resource has 3 or 4 contiguous 4-byte + * aligned addresses associated with it. Ignore the offset to check + * for VRM requests. + */ + if (addr1 == addr2) + return true; + else if (SLAVE_ID(addr1) == CMD_DB_HW_VRM && VRM_ADDR(addr1) == VRM_ADDR(addr2)) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(cmd_db_match_resource_addr); + /** * cmd_db_read_slave_id - Get the slave ID for a given resource address * diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c index e749a2b285..b722e28d9e 100644 --- a/drivers/soc/qcom/rpmh-rsc.c +++ b/drivers/soc/qcom/rpmh-rsc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #define pr_fmt(fmt) "%s " fmt, KBUILD_MODNAME @@ -519,7 +520,7 @@ static int check_for_req_inflight(struct rsc_drv *drv, struct tcs_group *tcs, for_each_set_bit(j, &curr_enabled, MAX_CMDS_PER_TCS) { addr = read_tcs_cmd(drv, RSC_DRV_CMD_ADDR, i, j); for (k = 0; k < msg->num_cmds; k++) { - if (addr == msg->cmds[k].addr) + if (cmd_db_match_resource_addr(msg->cmds[k].addr, addr)) return -EBUSY; } } diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index 7b340f3832..fb37e14404 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -1830,7 +1830,7 @@ struct sdw_cdns_pdi *sdw_cdns_alloc_pdi(struct sdw_cdns *cdns, /* check if we found a PDI, else find in bi-directional */ if (!pdi) - pdi = cdns_find_pdi(cdns, 2, stream->num_bd, stream->bd, + pdi = cdns_find_pdi(cdns, 0, stream->num_bd, stream->bd, dai_id); if (pdi) { diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 525cc0143a..54730e93fb 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -151,8 +151,6 @@ static const struct debugfs_reg32 hisi_spi_regs[] = { HISI_SPI_DBGFS_REG("ENR", HISI_SPI_ENR), HISI_SPI_DBGFS_REG("FIFOC", HISI_SPI_FIFOC), HISI_SPI_DBGFS_REG("IMR", HISI_SPI_IMR), - HISI_SPI_DBGFS_REG("DIN", HISI_SPI_DIN), - HISI_SPI_DBGFS_REG("DOUT", HISI_SPI_DOUT), HISI_SPI_DBGFS_REG("SR", HISI_SPI_SR), HISI_SPI_DBGFS_REG("RISR", HISI_SPI_RISR), HISI_SPI_DBGFS_REG("ISR", HISI_SPI_ISR), diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 191baa6e45..e8d21c93ed 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -884,7 +884,7 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) mask |= STM32H7_SPI_SR_TXP | STM32H7_SPI_SR_RXP; if (!(sr & mask)) { - dev_warn(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n", + dev_vdbg(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n", sr, ier); spin_unlock_irqrestore(&spi->lock, flags); return IRQ_NONE; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index d4b186a35b..128f1cda39 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1047,6 +1047,7 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg) else rx_dev = ctlr->dev.parent; + ret = -ENOMSG; list_for_each_entry(xfer, &msg->transfers, transfer_list) { if (!ctlr->can_dma(ctlr, msg->spi, xfer)) continue; @@ -1070,6 +1071,9 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg) } } } + /* No transfer has been mapped, bail out with success */ + if (ret) + return 0; ctlr->cur_msg_mapped = true; diff --git a/drivers/staging/greybus/arche-apb-ctrl.c b/drivers/staging/greybus/arche-apb-ctrl.c index bbf3ba744f..c7383c6c60 100644 --- a/drivers/staging/greybus/arche-apb-ctrl.c +++ b/drivers/staging/greybus/arche-apb-ctrl.c @@ -468,6 +468,7 @@ static const struct of_device_id arche_apb_ctrl_of_match[] = { { .compatible = "usbffff,2", }, { }, }; +MODULE_DEVICE_TABLE(of, arche_apb_ctrl_of_match); static struct platform_driver arche_apb_ctrl_device_driver = { .probe = arche_apb_ctrl_probe, diff --git a/drivers/staging/greybus/arche-platform.c b/drivers/staging/greybus/arche-platform.c index e374dfc0c9..00beb8bb1b 100644 --- a/drivers/staging/greybus/arche-platform.c +++ b/drivers/staging/greybus/arche-platform.c @@ -620,14 +620,7 @@ static const struct of_device_id arche_platform_of_match[] = { { .compatible = "google,arche-platform", }, { }, }; - -static const struct of_device_id arche_combined_id[] = { - /* Use PID/VID of SVC device */ - { .compatible = "google,arche-platform", }, - { .compatible = "usbffff,2", }, - { }, -}; -MODULE_DEVICE_TABLE(of, arche_combined_id); +MODULE_DEVICE_TABLE(of, arche_platform_of_match); static struct platform_driver arche_platform_device_driver = { .probe = arche_platform_probe, diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c index c6bd86a533..9999f84016 100644 --- a/drivers/staging/greybus/light.c +++ b/drivers/staging/greybus/light.c @@ -147,6 +147,9 @@ static int __gb_lights_flash_brightness_set(struct gb_channel *channel) channel = get_channel_from_mode(channel->light, GB_CHANNEL_MODE_TORCH); + if (!channel) + return -EINVAL; + /* For not flash we need to convert brightness to intensity */ intensity = channel->intensity_uA.min + (channel->intensity_uA.step * channel->led->brightness); @@ -549,7 +552,10 @@ static int gb_lights_light_v4l2_register(struct gb_light *light) } channel_flash = get_channel_from_mode(light, GB_CHANNEL_MODE_FLASH); - WARN_ON(!channel_flash); + if (!channel_flash) { + dev_err(dev, "failed to get flash channel from mode\n"); + return -EINVAL; + } fled = &channel_flash->fled; diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c index ba25d0da8b..feaec4cd96 100644 --- a/drivers/staging/media/atomisp/pci/sh_css.c +++ b/drivers/staging/media/atomisp/pci/sh_css.c @@ -5322,6 +5322,7 @@ static int load_video_binaries(struct ia_css_pipe *pipe) sizeof(struct ia_css_binary), GFP_KERNEL); if (!mycs->yuv_scaler_binary) { + mycs->num_yuv_scaler = 0; err = -ENOMEM; return err; } diff --git a/drivers/staging/media/ipu3/ipu3-v4l2.c b/drivers/staging/media/ipu3/ipu3-v4l2.c index ed091418f7..5110e95923 100644 --- a/drivers/staging/media/ipu3/ipu3-v4l2.c +++ b/drivers/staging/media/ipu3/ipu3-v4l2.c @@ -1068,6 +1068,11 @@ static int imgu_v4l2_subdev_register(struct imgu_device *imgu, struct imgu_media_pipe *imgu_pipe = &imgu->imgu_pipe[pipe]; /* Initialize subdev media entity */ + imgu_sd->subdev.entity.ops = &imgu_media_ops; + for (i = 0; i < IMGU_NODE_NUM; i++) { + imgu_sd->subdev_pads[i].flags = imgu_pipe->nodes[i].output ? + MEDIA_PAD_FL_SINK : MEDIA_PAD_FL_SOURCE; + } r = media_entity_pads_init(&imgu_sd->subdev.entity, IMGU_NODE_NUM, imgu_sd->subdev_pads); if (r) { @@ -1075,11 +1080,6 @@ static int imgu_v4l2_subdev_register(struct imgu_device *imgu, "failed initialize subdev media entity (%d)\n", r); return r; } - imgu_sd->subdev.entity.ops = &imgu_media_ops; - for (i = 0; i < IMGU_NODE_NUM; i++) { - imgu_sd->subdev_pads[i].flags = imgu_pipe->nodes[i].output ? - MEDIA_PAD_FL_SINK : MEDIA_PAD_FL_SOURCE; - } /* Initialize subdev */ v4l2_subdev_init(&imgu_sd->subdev, &imgu_subdev_ops); @@ -1174,15 +1174,15 @@ static int imgu_v4l2_node_setup(struct imgu_device *imgu, unsigned int pipe, } /* Initialize media entities */ + node->vdev_pad.flags = node->output ? + MEDIA_PAD_FL_SOURCE : MEDIA_PAD_FL_SINK; + vdev->entity.ops = NULL; r = media_entity_pads_init(&vdev->entity, 1, &node->vdev_pad); if (r) { dev_err(dev, "failed initialize media entity (%d)\n", r); mutex_destroy(&node->lock); return r; } - node->vdev_pad.flags = node->output ? - MEDIA_PAD_FL_SOURCE : MEDIA_PAD_FL_SINK; - vdev->entity.ops = NULL; /* Initialize vbq */ vbq->type = node->vdev_fmt.type; diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c index 76d3f03999..168ee6a59d 100644 --- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c +++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c @@ -940,8 +940,9 @@ static int create_component(struct vchiq_mmal_instance *instance, /* build component create message */ m.h.type = MMAL_MSG_TYPE_COMPONENT_CREATE; m.u.component_create.client_component = component->client_component; - strncpy(m.u.component_create.name, name, - sizeof(m.u.component_create.name)); + strscpy_pad(m.u.component_create.name, name, + sizeof(m.u.component_create.name)); + m.u.component_create.pid = 0; ret = send_synchronous_mmal_msg(instance, &m, sizeof(m.u.component_create), diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 023bd4516a..30ce3451bc 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3566,6 +3566,8 @@ static int __init target_core_init_configfs(void) { struct configfs_subsystem *subsys = &target_core_fabrics; struct t10_alua_lu_gp *lu_gp; + struct cred *kern_cred; + const struct cred *old_cred; int ret; pr_debug("TARGET_CORE[0]: Loading Generic Kernel Storage" @@ -3642,11 +3644,21 @@ static int __init target_core_init_configfs(void) if (ret < 0) goto out; + /* We use the kernel credentials to access the target directory */ + kern_cred = prepare_kernel_cred(&init_task); + if (!kern_cred) { + ret = -ENOMEM; + goto out; + } + old_cred = override_creds(kern_cred); target_init_dbroot(); + revert_creds(old_cred); + put_cred(kern_cred); return 0; out: + target_xcopy_release_pt(); configfs_unregister_subsystem(subsys); core_dev_release_virtual_lun0(); rd_module_exit(); diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index 8957065ea7..8be1dfcc06 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -90,13 +90,14 @@ static int optee_register_device(const uuid_t *device_uuid, u32 func) if (rc) { pr_err("device registration failed, err: %d\n", rc); put_device(&optee_device->dev); + return rc; } if (func == PTA_CMD_GET_DEVICES_SUPP) device_create_file(&optee_device->dev, &dev_attr_need_supplicant); - return rc; + return 0; } static int __optee_enumerate_devices(u32 func) diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index d38a80adec..5be79b5d78 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -199,7 +199,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd res = dfc->power_ops->get_real_power(df, power, freq, voltage); if (!res) { - state = dfc->capped_state; + state = dfc->max_state - dfc->capped_state; dfc->res_util = dfc->em_pd->table[state].power; dfc->res_util *= SCALE_ERROR_MITIGATION; diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c index cc94d8b005..36f0e92d92 100644 --- a/drivers/thermal/qcom/lmh.c +++ b/drivers/thermal/qcom/lmh.c @@ -92,6 +92,9 @@ static int lmh_probe(struct platform_device *pdev) int temp_low, temp_high, temp_arm, cpu_id, ret; u32 node_id; + if (!qcom_scm_is_available()) + return -EPROBE_DEFER; + lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL); if (!lmh_data) return -ENOMEM; diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c index 926cd8b411..2f31129cd5 100644 --- a/drivers/thermal/qcom/tsens.c +++ b/drivers/thermal/qcom/tsens.c @@ -84,7 +84,7 @@ void compute_intercept_slope(struct tsens_priv *priv, u32 *p1, for (i = 0; i < priv->num_sensors; i++) { dev_dbg(priv->dev, "%s: sensor%d - data_point1:%#x data_point2:%#x\n", - __func__, i, p1[i], p2[i]); + __func__, i, p1[i], p2 ? p2[i] : 0); if (!priv->sensor[i].slope) priv->sensor[i].slope = SLOPE_DEFAULT; diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 62a132b35b..fdfed54e62 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -2669,22 +2669,29 @@ void tb_switch_unconfigure_link(struct tb_switch *sw) { struct tb_port *up, *down; - if (sw->is_unplugged) - return; if (!tb_route(sw) || tb_switch_is_icm(sw)) return; + /* + * Unconfigure downstream port so that wake-on-connect can be + * configured after router unplug. No need to unconfigure upstream port + * since its router is unplugged. + */ up = tb_upstream_port(sw); - if (tb_switch_is_usb4(up->sw)) - usb4_port_unconfigure(up); - else - tb_lc_unconfigure_port(up); - down = up->remote; if (tb_switch_is_usb4(down->sw)) usb4_port_unconfigure(down); else tb_lc_unconfigure_port(down); + + if (sw->is_unplugged) + return; + + up = tb_upstream_port(sw); + if (tb_switch_is_usb4(up->sw)) + usb4_port_unconfigure(up); + else + tb_lc_unconfigure_port(up); } static void tb_switch_credits_init(struct tb_switch *sw) @@ -2926,7 +2933,26 @@ static int tb_switch_set_wake(struct tb_switch *sw, unsigned int flags) return tb_lc_set_wake(sw, flags); } -int tb_switch_resume(struct tb_switch *sw) +static void tb_switch_check_wakes(struct tb_switch *sw) +{ + if (device_may_wakeup(&sw->dev)) { + if (tb_switch_is_usb4(sw)) + usb4_switch_check_wakes(sw); + } +} + +/** + * tb_switch_resume() - Resume a switch after sleep + * @sw: Switch to resume + * @runtime: Is this resume from runtime suspend or system sleep + * + * Resumes and re-enumerates router (and all its children), if still plugged + * after suspend. Don't enumerate device router whose UID was changed during + * suspend. If this is resume from system sleep, notifies PM core about the + * wakes occurred during suspend. Disables all wakes, except USB4 wake of + * upstream port for USB4 routers that shall be always enabled. + */ +int tb_switch_resume(struct tb_switch *sw, bool runtime) { struct tb_port *port; int err; @@ -2971,6 +2997,9 @@ int tb_switch_resume(struct tb_switch *sw) if (err) return err; + if (!runtime) + tb_switch_check_wakes(sw); + /* Disable wakes */ tb_switch_set_wake(sw, 0); @@ -3000,7 +3029,8 @@ int tb_switch_resume(struct tb_switch *sw) */ if (tb_port_unlock(port)) tb_port_warn(port, "failed to unlock port\n"); - if (port->remote && tb_switch_resume(port->remote->sw)) { + if (port->remote && + tb_switch_resume(port->remote->sw, runtime)) { tb_port_warn(port, "lost during suspend, disconnecting\n"); tb_sw_set_unplugged(port->remote->sw); diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 0c3e1d14cd..8bf45da101 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1491,7 +1491,7 @@ static int tb_resume_noirq(struct tb *tb) /* remove any pci devices the firmware might have setup */ tb_switch_reset(tb->root_switch); - tb_switch_resume(tb->root_switch); + tb_switch_resume(tb->root_switch, false); tb_free_invalid_tunnels(tb); tb_free_unplugged_children(tb->root_switch); tb_restore_children(tb->root_switch); @@ -1617,7 +1617,7 @@ static int tb_runtime_resume(struct tb *tb) struct tb_tunnel *tunnel, *n; mutex_lock(&tb->lock); - tb_switch_resume(tb->root_switch); + tb_switch_resume(tb->root_switch, true); tb_free_invalid_tunnels(tb); tb_restore_children(tb->root_switch); list_for_each_entry_safe(tunnel, n, &tcm->tunnel_list, list) diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index db0d3d3777..a739cbf2be 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -740,7 +740,7 @@ int tb_switch_configure(struct tb_switch *sw); int tb_switch_add(struct tb_switch *sw); void tb_switch_remove(struct tb_switch *sw); void tb_switch_suspend(struct tb_switch *sw, bool runtime); -int tb_switch_resume(struct tb_switch *sw); +int tb_switch_resume(struct tb_switch *sw, bool runtime); int tb_switch_reset(struct tb_switch *sw); void tb_sw_set_unplugged(struct tb_switch *sw); struct tb_port *tb_switch_find_port(struct tb_switch *sw, @@ -1043,6 +1043,7 @@ static inline struct tb_retimer *tb_to_retimer(struct device *dev) return NULL; } +void usb4_switch_check_wakes(struct tb_switch *sw); int usb4_switch_setup(struct tb_switch *sw); int usb4_switch_read_uid(struct tb_switch *sw, u64 *uid); int usb4_switch_drom_read(struct tb_switch *sw, unsigned int address, void *buf, diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 36547afa18..44eb9c6588 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -175,15 +175,18 @@ static inline int usb4_switch_op_data(struct tb_switch *sw, u16 opcode, tx_dwords, rx_data, rx_dwords); } -static void usb4_switch_check_wakes(struct tb_switch *sw) +/** + * usb4_switch_check_wakes() - Check for wakes and notify PM core about them + * @sw: Router whose wakes to check + * + * Checks wakes occurred during suspend and notify the PM core about them. + */ +void usb4_switch_check_wakes(struct tb_switch *sw) { struct tb_port *port; bool wakeup = false; u32 val; - if (!device_may_wakeup(&sw->dev)) - return; - if (tb_route(sw)) { if (tb_sw_read(sw, &val, TB_CFG_SWITCH, ROUTER_CS_6, 1)) return; @@ -248,8 +251,6 @@ int usb4_switch_setup(struct tb_switch *sw) u32 val = 0; int ret; - usb4_switch_check_wakes(sw); - if (!tb_route(sw)) return 0; diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 2337402f95..aae9f73585 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -178,16 +178,18 @@ struct gsm_control { enum gsm_mux_state { GSM_SEARCH, - GSM_START, - GSM_ADDRESS, - GSM_CONTROL, - GSM_LEN, - GSM_DATA, - GSM_FCS, - GSM_OVERRUN, - GSM_LEN0, - GSM_LEN1, - GSM_SSOF, + GSM0_ADDRESS, + GSM0_CONTROL, + GSM0_LEN0, + GSM0_LEN1, + GSM0_DATA, + GSM0_FCS, + GSM0_SSOF, + GSM1_START, + GSM1_ADDRESS, + GSM1_CONTROL, + GSM1_DATA, + GSM1_OVERRUN, }; /* @@ -2162,6 +2164,30 @@ static void gsm_queue(struct gsm_mux *gsm) return; } +/** + * gsm0_receive_state_check_and_fix - check and correct receive state + * @gsm: gsm data for this ldisc instance + * + * Ensures that the current receive state is valid for basic option mode. + */ + +static void gsm0_receive_state_check_and_fix(struct gsm_mux *gsm) +{ + switch (gsm->state) { + case GSM_SEARCH: + case GSM0_ADDRESS: + case GSM0_CONTROL: + case GSM0_LEN0: + case GSM0_LEN1: + case GSM0_DATA: + case GSM0_FCS: + case GSM0_SSOF: + break; + default: + gsm->state = GSM_SEARCH; + break; + } +} /** * gsm0_receive - perform processing for non-transparency @@ -2175,26 +2201,27 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c) { unsigned int len; + gsm0_receive_state_check_and_fix(gsm); switch (gsm->state) { case GSM_SEARCH: /* SOF marker */ if (c == GSM0_SOF) { - gsm->state = GSM_ADDRESS; + gsm->state = GSM0_ADDRESS; gsm->address = 0; gsm->len = 0; gsm->fcs = INIT_FCS; } break; - case GSM_ADDRESS: /* Address EA */ + case GSM0_ADDRESS: /* Address EA */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); if (gsm_read_ea(&gsm->address, c)) - gsm->state = GSM_CONTROL; + gsm->state = GSM0_CONTROL; break; - case GSM_CONTROL: /* Control Byte */ + case GSM0_CONTROL: /* Control Byte */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); gsm->control = c; - gsm->state = GSM_LEN0; + gsm->state = GSM0_LEN0; break; - case GSM_LEN0: /* Length EA */ + case GSM0_LEN0: /* Length EA */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); if (gsm_read_ea(&gsm->len, c)) { if (gsm->len > gsm->mru) { @@ -2204,14 +2231,14 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c) } gsm->count = 0; if (!gsm->len) - gsm->state = GSM_FCS; + gsm->state = GSM0_FCS; else - gsm->state = GSM_DATA; + gsm->state = GSM0_DATA; break; } - gsm->state = GSM_LEN1; + gsm->state = GSM0_LEN1; break; - case GSM_LEN1: + case GSM0_LEN1: gsm->fcs = gsm_fcs_add(gsm->fcs, c); len = c; gsm->len |= len << 7; @@ -2222,26 +2249,29 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c) } gsm->count = 0; if (!gsm->len) - gsm->state = GSM_FCS; + gsm->state = GSM0_FCS; else - gsm->state = GSM_DATA; + gsm->state = GSM0_DATA; break; - case GSM_DATA: /* Data */ + case GSM0_DATA: /* Data */ gsm->buf[gsm->count++] = c; - if (gsm->count == gsm->len) { + if (gsm->count >= MAX_MRU) { + gsm->bad_size++; + gsm->state = GSM_SEARCH; + } else if (gsm->count >= gsm->len) { /* Calculate final FCS for UI frames over all data */ if ((gsm->control & ~PF) != UIH) { gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->count); } - gsm->state = GSM_FCS; + gsm->state = GSM0_FCS; } break; - case GSM_FCS: /* FCS follows the packet */ + case GSM0_FCS: /* FCS follows the packet */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); - gsm->state = GSM_SSOF; + gsm->state = GSM0_SSOF; break; - case GSM_SSOF: + case GSM0_SSOF: gsm->state = GSM_SEARCH; if (c == GSM0_SOF) gsm_queue(gsm); @@ -2254,6 +2284,29 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c) } } +/** + * gsm1_receive_state_check_and_fix - check and correct receive state + * @gsm: gsm data for this ldisc instance + * + * Ensures that the current receive state is valid for advanced option mode. + */ + +static void gsm1_receive_state_check_and_fix(struct gsm_mux *gsm) +{ + switch (gsm->state) { + case GSM_SEARCH: + case GSM1_START: + case GSM1_ADDRESS: + case GSM1_CONTROL: + case GSM1_DATA: + case GSM1_OVERRUN: + break; + default: + gsm->state = GSM_SEARCH; + break; + } +} + /** * gsm1_receive - perform processing for non-transparency * @gsm: gsm data for this ldisc instance @@ -2264,6 +2317,7 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c) static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) { + gsm1_receive_state_check_and_fix(gsm); /* handle XON/XOFF */ if ((c & ISO_IEC_646_MASK) == XON) { gsm->constipated = true; @@ -2276,11 +2330,11 @@ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) } if (c == GSM1_SOF) { /* EOF is only valid in frame if we have got to the data state */ - if (gsm->state == GSM_DATA) { + if (gsm->state == GSM1_DATA) { if (gsm->count < 1) { /* Missing FSC */ gsm->malformed++; - gsm->state = GSM_START; + gsm->state = GSM1_START; return; } /* Remove the FCS from data */ @@ -2296,14 +2350,14 @@ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->buf[gsm->count]); gsm->len = gsm->count; gsm_queue(gsm); - gsm->state = GSM_START; + gsm->state = GSM1_START; return; } /* Any partial frame was a runt so go back to start */ - if (gsm->state != GSM_START) { + if (gsm->state != GSM1_START) { if (gsm->state != GSM_SEARCH) gsm->malformed++; - gsm->state = GSM_START; + gsm->state = GSM1_START; } /* A SOF in GSM_START means we are still reading idling or framing bytes */ @@ -2324,30 +2378,30 @@ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) gsm->escape = false; } switch (gsm->state) { - case GSM_START: /* First byte after SOF */ + case GSM1_START: /* First byte after SOF */ gsm->address = 0; - gsm->state = GSM_ADDRESS; + gsm->state = GSM1_ADDRESS; gsm->fcs = INIT_FCS; fallthrough; - case GSM_ADDRESS: /* Address continuation */ + case GSM1_ADDRESS: /* Address continuation */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); if (gsm_read_ea(&gsm->address, c)) - gsm->state = GSM_CONTROL; + gsm->state = GSM1_CONTROL; break; - case GSM_CONTROL: /* Control Byte */ + case GSM1_CONTROL: /* Control Byte */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); gsm->control = c; gsm->count = 0; - gsm->state = GSM_DATA; + gsm->state = GSM1_DATA; break; - case GSM_DATA: /* Data */ - if (gsm->count > gsm->mru) { /* Allow one for the FCS */ - gsm->state = GSM_OVERRUN; + case GSM1_DATA: /* Data */ + if (gsm->count > gsm->mru || gsm->count > MAX_MRU) { /* Allow one for the FCS */ + gsm->state = GSM1_OVERRUN; gsm->bad_size++; } else gsm->buf[gsm->count++] = c; break; - case GSM_OVERRUN: /* Over-long - eg a dropped SOF */ + case GSM1_OVERRUN: /* Over-long - eg a dropped SOF */ break; default: pr_debug("%s: unhandled state: %d\n", __func__, gsm->state); @@ -2827,6 +2881,9 @@ static int gsmld_open(struct tty_struct *tty) { struct gsm_mux *gsm; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (tty->ops->write == NULL) return -EINVAL; diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c index f95047160b..8a32418feb 100644 --- a/drivers/tty/serial/8250/8250_bcm7271.c +++ b/drivers/tty/serial/8250/8250_bcm7271.c @@ -674,18 +674,46 @@ static void init_real_clk_rates(struct device *dev, struct brcmuart_priv *priv) clk_set_rate(priv->baud_mux_clk, priv->default_mux_rate); } +static u32 find_quot(struct device *dev, u32 freq, u32 baud, u32 *percent) +{ + u32 quot; + u32 rate; + u64 hires_rate; + u64 hires_baud; + u64 hires_err; + + rate = freq / 16; + quot = DIV_ROUND_CLOSEST(rate, baud); + if (!quot) + return 0; + + /* increase resolution to get xx.xx percent */ + hires_rate = div_u64((u64)rate * 10000, (u64)quot); + hires_baud = (u64)baud * 10000; + + /* get the delta */ + if (hires_rate > hires_baud) + hires_err = (hires_rate - hires_baud); + else + hires_err = (hires_baud - hires_rate); + + *percent = (unsigned long)DIV_ROUND_CLOSEST_ULL(hires_err, baud); + + dev_dbg(dev, "Baud rate: %u, MUX Clk: %u, Error: %u.%u%%\n", + baud, freq, *percent / 100, *percent % 100); + + return quot; +} + static void set_clock_mux(struct uart_port *up, struct brcmuart_priv *priv, u32 baud) { u32 percent; u32 best_percent = UINT_MAX; u32 quot; + u32 freq; u32 best_quot = 1; - u32 rate; - int best_index = -1; - u64 hires_rate; - u64 hires_baud; - u64 hires_err; + u32 best_freq = 0; int rc; int i; int real_baud; @@ -694,44 +722,35 @@ static void set_clock_mux(struct uart_port *up, struct brcmuart_priv *priv, if (priv->baud_mux_clk == NULL) return; - /* Find the closest match for specified baud */ - for (i = 0; i < ARRAY_SIZE(priv->real_rates); i++) { - if (priv->real_rates[i] == 0) - continue; - rate = priv->real_rates[i] / 16; - quot = DIV_ROUND_CLOSEST(rate, baud); - if (!quot) - continue; - - /* increase resolution to get xx.xx percent */ - hires_rate = (u64)rate * 10000; - hires_baud = (u64)baud * 10000; - - hires_err = div_u64(hires_rate, (u64)quot); - - /* get the delta */ - if (hires_err > hires_baud) - hires_err = (hires_err - hires_baud); - else - hires_err = (hires_baud - hires_err); - - percent = (unsigned long)DIV_ROUND_CLOSEST_ULL(hires_err, baud); - dev_dbg(up->dev, - "Baud rate: %u, MUX Clk: %u, Error: %u.%u%%\n", - baud, priv->real_rates[i], percent / 100, - percent % 100); - if (percent < best_percent) { - best_percent = percent; - best_index = i; - best_quot = quot; + /* Try default_mux_rate first */ + quot = find_quot(up->dev, priv->default_mux_rate, baud, &percent); + if (quot) { + best_percent = percent; + best_freq = priv->default_mux_rate; + best_quot = quot; + } + /* If more than 1% error, find the closest match for specified baud */ + if (best_percent > 100) { + for (i = 0; i < ARRAY_SIZE(priv->real_rates); i++) { + freq = priv->real_rates[i]; + if (freq == 0 || freq == priv->default_mux_rate) + continue; + quot = find_quot(up->dev, freq, baud, &percent); + if (!quot) + continue; + + if (percent < best_percent) { + best_percent = percent; + best_freq = freq; + best_quot = quot; + } } } - if (best_index == -1) { + if (!best_freq) { dev_err(up->dev, "Error, %d BAUD rate is too fast.\n", baud); return; } - rate = priv->real_rates[best_index]; - rc = clk_set_rate(priv->baud_mux_clk, rate); + rc = clk_set_rate(priv->baud_mux_clk, best_freq); if (rc) dev_err(up->dev, "Error selecting BAUD MUX clock\n"); @@ -740,8 +759,8 @@ static void set_clock_mux(struct uart_port *up, struct brcmuart_priv *priv, dev_err(up->dev, "Error, baud: %d has %u.%u%% error\n", baud, percent / 100, percent % 100); - real_baud = rate / 16 / best_quot; - dev_dbg(up->dev, "Selecting BAUD MUX rate: %u\n", rate); + real_baud = best_freq / 16 / best_quot; + dev_dbg(up->dev, "Selecting BAUD MUX rate: %u\n", best_freq); dev_dbg(up->dev, "Requested baud: %u, Actual baud: %u\n", baud, real_baud); @@ -750,7 +769,7 @@ static void set_clock_mux(struct uart_port *up, struct brcmuart_priv *priv, i += (i / 2); priv->char_wait = ns_to_ktime(i); - up->uartclk = rate; + up->uartclk = best_freq; } static void brcmstb_set_termios(struct uart_port *up, diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 4fce318bc8..2d595a646b 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1366,9 +1366,6 @@ static void autoconfig_irq(struct uart_8250_port *up) inb_p(ICP); } - if (uart_console(port)) - console_lock(); - /* forget possible initially masked and pending IRQ */ probe_irq_off(probe_irq_on()); save_mcr = serial8250_in_MCR(up); @@ -1399,9 +1396,6 @@ static void autoconfig_irq(struct uart_8250_port *up) if (port->flags & UPF_FOURPORT) outb_p(save_ICP, ICP); - if (uart_console(port)) - console_unlock(); - port->irq = (irq > 0) ? irq : 0; } diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index e0d576b88d..9f1be9ce47 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2237,9 +2237,12 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, lpuart32_write(&sport->port, bd, UARTBAUD); lpuart32_serial_setbrg(sport, baud); - lpuart32_write(&sport->port, modem, UARTMODIR); - lpuart32_write(&sport->port, ctrl, UARTCTRL); + /* disable CTS before enabling UARTCTRL_TE to avoid pending idle preamble */ + lpuart32_write(&sport->port, modem & ~UARTMODIR_TXCTSE, UARTMODIR); /* restore control register */ + lpuart32_write(&sport->port, ctrl, UARTCTRL); + /* re-enable the CTS if needed */ + lpuart32_write(&sport->port, modem, UARTMODIR); if (old && sport->lpuart_dma_rx_use) { if (!lpuart_start_rx_dma(sport)) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 4504b5fcc1..0587beaaea 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -491,8 +491,7 @@ static void imx_uart_stop_tx(struct uart_port *port) } } -/* called with port.lock taken and irqs off */ -static void imx_uart_stop_rx(struct uart_port *port) +static void imx_uart_stop_rx_with_loopback_ctrl(struct uart_port *port, bool loopback) { struct imx_port *sport = (struct imx_port *)port; u32 ucr1, ucr2, ucr4, uts; @@ -514,7 +513,7 @@ static void imx_uart_stop_rx(struct uart_port *port) /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */ if (port->rs485.flags & SER_RS485_ENABLED && port->rs485.flags & SER_RS485_RTS_ON_SEND && - sport->have_rtscts && !sport->have_rtsgpio) { + sport->have_rtscts && !sport->have_rtsgpio && loopback) { uts = imx_uart_readl(sport, imx_uart_uts_reg(sport)); uts |= UTS_LOOP; imx_uart_writel(sport, uts, imx_uart_uts_reg(sport)); @@ -526,6 +525,16 @@ static void imx_uart_stop_rx(struct uart_port *port) imx_uart_writel(sport, ucr2, UCR2); } +/* called with port.lock taken and irqs off */ +static void imx_uart_stop_rx(struct uart_port *port) +{ + /* + * Stop RX and enable loopback in order to make sure RS485 bus + * is not blocked. Se comment in imx_uart_probe(). + */ + imx_uart_stop_rx_with_loopback_ctrl(port, true); +} + /* called with port.lock taken and irqs off */ static void imx_uart_enable_ms(struct uart_port *port) { @@ -714,8 +723,13 @@ static void imx_uart_start_tx(struct uart_port *port) imx_uart_rts_inactive(sport, &ucr2); imx_uart_writel(sport, ucr2, UCR2); + /* + * Since we are about to transmit we can not stop RX + * with loopback enabled because that will make our + * transmitted data being just looped to RX. + */ if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - imx_uart_stop_rx(port); + imx_uart_stop_rx_with_loopback_ctrl(port, false); sport->tx_state = WAIT_AFTER_RTS; start_hrtimer_ms(&sport->trigger_start_tx, diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 79b7db8580..d988511f8b 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,25 @@ static struct kgdb_io kgdboc_earlycon_io_ops; static int (*earlycon_orig_exit)(struct console *con); #endif /* IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) */ +/* + * When we leave the debug trap handler we need to reset the keyboard status + * (since the original keyboard state gets partially clobbered by kdb use of + * the keyboard). + * + * The path to deliver the reset is somewhat circuitous. + * + * To deliver the reset we register an input handler, reset the keyboard and + * then deregister the input handler. However, to get this done right, we do + * have to carefully manage the calling context because we can only register + * input handlers from task context. + * + * In particular we need to trigger the action from the debug trap handler with + * all its NMI and/or NMI-like oddities. To solve this the kgdboc trap exit code + * (the "post_exception" callback) uses irq_work_queue(), which is NMI-safe, to + * schedule a callback from a hardirq context. From there we have to defer the + * work again, this time using schedule_work(), to get a callback using the + * system workqueue, which runs in task context. + */ #ifdef CONFIG_KDB_KEYBOARD static int kgdboc_reset_connect(struct input_handler *handler, struct input_dev *dev, @@ -99,10 +119,17 @@ static void kgdboc_restore_input_helper(struct work_struct *dummy) static DECLARE_WORK(kgdboc_restore_input_work, kgdboc_restore_input_helper); +static void kgdboc_queue_restore_input_helper(struct irq_work *unused) +{ + schedule_work(&kgdboc_restore_input_work); +} + +static DEFINE_IRQ_WORK(kgdboc_restore_input_irq_work, kgdboc_queue_restore_input_helper); + static void kgdboc_restore_input(void) { if (likely(system_state == SYSTEM_RUNNING)) - schedule_work(&kgdboc_restore_input_work); + irq_work_queue(&kgdboc_restore_input_irq_work); } static int kgdboc_register_kbd(char **cptr) @@ -133,6 +160,7 @@ static void kgdboc_unregister_kbd(void) i--; } } + irq_work_sync(&kgdboc_restore_input_irq_work); flush_work(&kgdboc_restore_input_work); } #else /* ! CONFIG_KDB_KEYBOARD */ diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c index 3c92d4e014..8290ab72c0 100644 --- a/drivers/tty/serial/max3100.c +++ b/drivers/tty/serial/max3100.c @@ -45,6 +45,9 @@ #include #include #include +#include + +#include #include @@ -191,7 +194,7 @@ static void max3100_timeout(struct timer_list *t) static int max3100_sr(struct max3100_port *s, u16 tx, u16 *rx) { struct spi_message message; - u16 etx, erx; + __be16 etx, erx; int status; struct spi_transfer tran = { .tx_buf = &etx, @@ -213,7 +216,7 @@ static int max3100_sr(struct max3100_port *s, u16 tx, u16 *rx) return 0; } -static int max3100_handlerx(struct max3100_port *s, u16 rx) +static int max3100_handlerx_unlocked(struct max3100_port *s, u16 rx) { unsigned int ch, flg, status = 0; int ret = 0, cts; @@ -253,6 +256,17 @@ static int max3100_handlerx(struct max3100_port *s, u16 rx) return ret; } +static int max3100_handlerx(struct max3100_port *s, u16 rx) +{ + unsigned long flags; + int ret; + + uart_port_lock_irqsave(&s->port, &flags); + ret = max3100_handlerx_unlocked(s, rx); + uart_port_unlock_irqrestore(&s->port, flags); + return ret; +} + static void max3100_work(struct work_struct *w) { struct max3100_port *s = container_of(w, struct max3100_port, work); @@ -740,13 +754,14 @@ static int max3100_probe(struct spi_device *spi) mutex_lock(&max3100s_lock); if (!uart_driver_registered) { - uart_driver_registered = 1; retval = uart_register_driver(&max3100_uart_driver); if (retval) { printk(KERN_ERR "Couldn't register max3100 uart driver\n"); mutex_unlock(&max3100s_lock); return retval; } + + uart_driver_registered = 1; } for (i = 0; i < MAX_MAX3100; i++) @@ -832,6 +847,7 @@ static int max3100_remove(struct spi_device *spi) } pr_debug("removing max3100 driver\n"); uart_unregister_driver(&max3100_uart_driver); + uart_driver_registered = 0; mutex_unlock(&max3100s_lock); return 0; diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index ac45f3386e..dfedd3f113 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -1094,11 +1094,13 @@ static void mxs_auart_set_ldisc(struct uart_port *port, static irqreturn_t mxs_auart_irq_handle(int irq, void *context) { - u32 istat; + u32 istat, stat; struct mxs_auart_port *s = context; u32 mctrl_temp = s->mctrl_prev; - u32 stat = mxs_read(s, REG_STAT); + uart_port_lock(&s->port); + + stat = mxs_read(s, REG_STAT); istat = mxs_read(s, REG_INTR); /* ack irq */ @@ -1134,6 +1136,8 @@ static irqreturn_t mxs_auart_irq_handle(int irq, void *context) istat &= ~AUART_INTR_TXIS; } + uart_port_unlock(&s->port); + return IRQ_HANDLED; } diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c index 12ce150b0a..b2eadbac01 100644 --- a/drivers/tty/serial/pmac_zilog.c +++ b/drivers/tty/serial/pmac_zilog.c @@ -217,7 +217,6 @@ static bool pmz_receive_chars(struct uart_pmac_port *uap) { struct tty_port *port; unsigned char ch, r1, drop, flag; - int loops = 0; /* Sanity check, make sure the old bug is no longer happening */ if (uap->port.state == NULL) { @@ -298,24 +297,11 @@ static bool pmz_receive_chars(struct uart_pmac_port *uap) if (r1 & Rx_OVR) tty_insert_flip_char(port, 0, TTY_OVERRUN); next_char: - /* We can get stuck in an infinite loop getting char 0 when the - * line is in a wrong HW state, we break that here. - * When that happens, I disable the receive side of the driver. - * Note that what I've been experiencing is a real irq loop where - * I'm getting flooded regardless of the actual port speed. - * Something strange is going on with the HW - */ - if ((++loops) > 1000) - goto flood; ch = read_zsreg(uap, R0); if (!(ch & Rx_CH_AV)) break; } - return true; - flood: - pmz_interrupt_control(uap, 0); - pmz_error("pmz: rx irq flood !\n"); return true; } diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 8c09c97f98..35f8675db1 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -25,7 +26,6 @@ #include #include #include -#include #define SC16IS7XX_NAME "sc16is7xx" #define SC16IS7XX_MAX_DEVS 8 @@ -376,9 +376,7 @@ static void sc16is7xx_fifo_read(struct uart_port *port, unsigned int rxlen) const u8 line = sc16is7xx_line(port); u8 addr = (SC16IS7XX_RHR_REG << SC16IS7XX_REG_SHIFT) | line; - regcache_cache_bypass(s->regmap, true); - regmap_raw_read(s->regmap, addr, s->buf, rxlen); - regcache_cache_bypass(s->regmap, false); + regmap_noinc_read(s->regmap, addr, s->buf, rxlen); } static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send) @@ -394,9 +392,7 @@ static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send) if (unlikely(!to_send)) return; - regcache_cache_bypass(s->regmap, true); - regmap_raw_write(s->regmap, addr, s->buf, to_send); - regcache_cache_bypass(s->regmap, false); + regmap_noinc_write(s->regmap, addr, s->buf, to_send); } static void sc16is7xx_port_update(struct uart_port *port, u8 reg, @@ -489,6 +485,11 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg) return false; } +static bool sc16is7xx_regmap_noinc(struct device *dev, unsigned int reg) +{ + return reg == SC16IS7XX_RHR_REG; +} + static int sc16is7xx_set_baud(struct uart_port *port, int baud) { struct sc16is7xx_port *s = dev_get_drvdata(port->dev); @@ -1439,6 +1440,8 @@ static struct regmap_config regcfg = { .cache_type = REGCACHE_RBTREE, .volatile_reg = sc16is7xx_regmap_volatile, .precious_reg = sc16is7xx_regmap_precious, + .writeable_noinc_reg = sc16is7xx_regmap_noinc, + .readable_noinc_reg = sc16is7xx_regmap_noinc, }; #ifdef CONFIG_SERIAL_SC16IS7XX_SPI diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 45b721abaa..b638b2fd2d 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2410,7 +2410,12 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, port->type = PORT_UNKNOWN; flags |= UART_CONFIG_TYPE; } + /* Synchronize with possible boot console. */ + if (uart_console(port)) + console_lock(); port->ops->config_port(port, flags); + if (uart_console(port)) + console_unlock(); } if (port->type != PORT_UNKNOWN) { @@ -2418,6 +2423,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, uart_report_port(drv, port); + /* Synchronize with possible boot console. */ + if (uart_console(port)) + console_lock(); + /* Power up port for set_mctrl() */ uart_change_pm(state, UART_PM_STATE_ON); @@ -2434,6 +2443,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, port->rs485_config(port, &port->rs485); spin_unlock_irqrestore(&port->lock, flags); + if (uart_console(port)) + console_unlock(); + /* * If this driver supports console, and it hasn't been * successfully registered yet, try to re-register it. diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 2531817609..6cd7bd7b67 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1255,9 +1255,14 @@ static void sci_dma_rx_chan_invalidate(struct sci_port *s) static void sci_dma_rx_release(struct sci_port *s) { struct dma_chan *chan = s->chan_rx_saved; + struct uart_port *port = &s->port; + unsigned long flags; + uart_port_lock_irqsave(port, &flags); s->chan_rx_saved = NULL; sci_dma_rx_chan_invalidate(s); + uart_port_unlock_irqrestore(port, flags); + dmaengine_terminate_sync(chan); dma_free_coherent(chan->device->dev, s->buf_len_rx * 2, s->rx_buf[0], sg_dma_address(&s->sg_rx[0])); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 442579f087..3dc2124bd0 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -398,7 +398,7 @@ static void vc_uniscr_delete(struct vc_data *vc, unsigned int nr) char32_t *ln = uniscr->lines[vc->state.y]; unsigned int x = vc->state.x, cols = vc->vc_cols; - memcpy(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln)); + memmove(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln)); memset32(&ln[cols - nr], ' ', nr); } } diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 2cd0089660..7bb3f81ac3 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -116,7 +116,6 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); #define HUB_DEBOUNCE_STEP 25 #define HUB_DEBOUNCE_STABLE 100 -static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1, @@ -678,14 +677,14 @@ static void kick_hub_wq(struct usb_hub *hub) */ intf = to_usb_interface(hub->intfdev); usb_autopm_get_interface_no_resume(intf); - kref_get(&hub->kref); + hub_get(hub); if (queue_work(hub_wq, &hub->events)) return; /* the work has already been scheduled */ usb_autopm_put_interface_async(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); } void usb_kick_hub_wq(struct usb_device *hdev) @@ -1053,7 +1052,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) goto init2; goto init3; } - kref_get(&hub->kref); + hub_get(hub); /* The superspeed hub except for root hub has to use Hub Depth * value as an offset into the route string to locate the bits @@ -1301,7 +1300,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_unlock(&hdev->dev); } - kref_put(&hub->kref, hub_release); + hub_put(hub); } /* Implement the continuations for the delays above */ @@ -1717,6 +1716,16 @@ static void hub_release(struct kref *kref) kfree(hub); } +void hub_get(struct usb_hub *hub) +{ + kref_get(&hub->kref); +} + +void hub_put(struct usb_hub *hub) +{ + kref_put(&hub->kref, hub_release); +} + static unsigned highspeed_hubs; static void hub_disconnect(struct usb_interface *intf) @@ -1763,7 +1772,7 @@ static void hub_disconnect(struct usb_interface *intf) if (hub->quirk_disable_autosuspend) usb_autopm_put_interface(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); } static bool hub_descriptor_is_sane(struct usb_host_interface *desc) @@ -5040,9 +5049,10 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, } if (usb_endpoint_maxp(&udev->ep0.desc) == i) { ; /* Initial ep0 maxpacket guess is right */ - } else if ((udev->speed == USB_SPEED_FULL || + } else if (((udev->speed == USB_SPEED_FULL || udev->speed == USB_SPEED_HIGH) && - (i == 8 || i == 16 || i == 32 || i == 64)) { + (i == 8 || i == 16 || i == 32 || i == 64)) || + (udev->speed >= USB_SPEED_SUPER && i > 0)) { /* Initial guess is wrong; use the descriptor's value */ if (udev->speed == USB_SPEED_FULL) dev_dbg(&udev->dev, "ep0 maxpacket = %d\n", i); @@ -5853,7 +5863,7 @@ static void hub_event(struct work_struct *work) /* Balance the stuff in kick_hub_wq() and allow autosuspend */ usb_autopm_put_interface(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); kcov_remote_stop(); } diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index db4c7e2c59..dd049bc85f 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -117,6 +117,8 @@ extern void usb_hub_remove_port_device(struct usb_hub *hub, extern int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub, int port1, bool set); extern struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev); +extern void hub_get(struct usb_hub *hub); +extern void hub_put(struct usb_hub *hub); extern int hub_port_debounce(struct usb_hub *hub, int port1, bool must_be_connected); extern int usb_clear_port_feature(struct usb_device *hdev, diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index dfcca9c876..2d87a4681e 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -295,8 +295,10 @@ static void usb_port_shutdown(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); - if (port_dev->child) + if (port_dev->child) { usb_disable_usb2_hardware_lpm(port_dev->child); + usb_unlocked_disable_lpm(port_dev->child); + } } static const struct dev_pm_ops usb_port_pm_ops = { @@ -450,7 +452,7 @@ static int match_location(struct usb_device *peer_hdev, void *p) struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev); struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent); - if (!peer_hub) + if (!peer_hub || port_dev->connect_type == USB_PORT_NOT_USED) return 0; hcd = bus_to_hcd(hdev->bus); @@ -461,7 +463,8 @@ static int match_location(struct usb_device *peer_hdev, void *p) for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) { peer = peer_hub->ports[port1 - 1]; - if (peer && peer->location == port_dev->location) { + if (peer && peer->connect_type != USB_PORT_NOT_USED && + peer->location == port_dev->location) { link_peers_report(port_dev, peer); return 1; /* done */ } diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index 60ee0469d8..c9501907f7 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -1169,14 +1169,24 @@ static ssize_t interface_authorized_store(struct device *dev, { struct usb_interface *intf = to_usb_interface(dev); bool val; + struct kernfs_node *kn; if (strtobool(buf, &val) != 0) return -EINVAL; - if (val) + if (val) { usb_authorize_interface(intf); - else - usb_deauthorize_interface(intf); + } else { + /* + * Prevent deadlock if another process is concurrently + * trying to unregister intf. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (kn) { + usb_deauthorize_interface(intf); + sysfs_unbreak_active_protection(kn); + } + } return count; } diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 71e62b3081..cbf975b75d 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -755,8 +755,14 @@ struct dwc2_dregs_backup { * struct dwc2_hregs_backup - Holds host registers state before * entering partial power down * @hcfg: Backup of HCFG register + * @hflbaddr: Backup of HFLBADDR register * @haintmsk: Backup of HAINTMSK register + * @hcchar: Backup of HCCHAR register + * @hcsplt: Backup of HCSPLT register * @hcintmsk: Backup of HCINTMSK register + * @hctsiz: Backup of HCTSIZ register + * @hdma: Backup of HCDMA register + * @hcdmab: Backup of HCDMAB register * @hprt0: Backup of HPTR0 register * @hfir: Backup of HFIR register * @hptxfsiz: Backup of HPTXFSIZ register @@ -764,8 +770,14 @@ struct dwc2_dregs_backup { */ struct dwc2_hregs_backup { u32 hcfg; + u32 hflbaddr; u32 haintmsk; + u32 hcchar[MAX_EPS_CHANNELS]; + u32 hcsplt[MAX_EPS_CHANNELS]; u32 hcintmsk[MAX_EPS_CHANNELS]; + u32 hctsiz[MAX_EPS_CHANNELS]; + u32 hcidma[MAX_EPS_CHANNELS]; + u32 hcidmab[MAX_EPS_CHANNELS]; u32 hprt0; u32 hfir; u32 hptxfsiz; @@ -1107,6 +1119,7 @@ struct dwc2_hsotg { bool needs_byte_swap; /* DWC OTG HW Release versions */ +#define DWC2_CORE_REV_4_30a 0x4f54430a #define DWC2_CORE_REV_2_71a 0x4f54271a #define DWC2_CORE_REV_2_72a 0x4f54272a #define DWC2_CORE_REV_2_80a 0x4f54280a @@ -1345,6 +1358,7 @@ int dwc2_backup_global_registers(struct dwc2_hsotg *hsotg); int dwc2_restore_global_registers(struct dwc2_hsotg *hsotg); void dwc2_enable_acg(struct dwc2_hsotg *hsotg); +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup); /* This function should be called on every hardware interrupt. */ irqreturn_t dwc2_handle_common_intr(int irq, void *dev); diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index a5c52b237e..5db14b1a5c 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -327,7 +327,8 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } @@ -352,10 +353,11 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg) * @hsotg: Programming view of DWC_otg controller * */ -static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) +void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup) { u32 glpmcfg; - u32 i = 0; + u32 pcgctl; + u32 dctl; if (hsotg->lx_state != DWC2_L1) { dev_err(hsotg->dev, "Core isn't in DWC2_L1 state\n"); @@ -364,37 +366,57 @@ static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg) glpmcfg = dwc2_readl(hsotg, GLPMCFG); if (dwc2_is_device_mode(hsotg)) { - dev_dbg(hsotg->dev, "Exit from L1 state\n"); + dev_dbg(hsotg->dev, "Exit from L1 state, remotewakeup=%d\n", remotewakeup); glpmcfg &= ~GLPMCFG_ENBLSLPM; - glpmcfg &= ~GLPMCFG_HIRD_THRES_EN; + glpmcfg &= ~GLPMCFG_HIRD_THRES_MASK; dwc2_writel(hsotg, glpmcfg, GLPMCFG); - do { - glpmcfg = dwc2_readl(hsotg, GLPMCFG); + pcgctl = dwc2_readl(hsotg, PCGCTL); + pcgctl &= ~PCGCTL_ENBL_SLEEP_GATING; + dwc2_writel(hsotg, pcgctl, PCGCTL); - if (!(glpmcfg & (GLPMCFG_COREL1RES_MASK | - GLPMCFG_L1RESUMEOK | GLPMCFG_SLPSTS))) - break; + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_ENBESL) { + glpmcfg |= GLPMCFG_RSTRSLPSTS; + dwc2_writel(hsotg, glpmcfg, GLPMCFG); + } + + if (remotewakeup) { + if (dwc2_hsotg_wait_bit_set(hsotg, GLPMCFG, GLPMCFG_L1RESUMEOK, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GLPMCFG_L1RESUMEOK\n", __func__); + goto fail; + return; + } + + dctl = dwc2_readl(hsotg, DCTL); + dctl |= DCTL_RMTWKUPSIG; + dwc2_writel(hsotg, dctl, DCTL); - udelay(1); - } while (++i < 200); + if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_WKUPINT, 1000)) { + dev_warn(hsotg->dev, "%s: timeout GINTSTS_WKUPINT\n", __func__); + goto fail; + return; + } + } - if (i == 200) { - dev_err(hsotg->dev, "Failed to exit L1 sleep state in 200us.\n"); + glpmcfg = dwc2_readl(hsotg, GLPMCFG); + if (glpmcfg & GLPMCFG_COREL1RES_MASK || glpmcfg & GLPMCFG_SLPSTS || + glpmcfg & GLPMCFG_L1RESUMEOK) { + goto fail; return; } - dwc2_gadget_init_lpm(hsotg); + + /* Inform gadget to exit from L1 */ + call_gadget(hsotg, resume); + /* Change to L0 state */ + hsotg->lx_state = DWC2_L0; + hsotg->bus_suspended = false; +fail: dwc2_gadget_init_lpm(hsotg); } else { /* TODO */ dev_err(hsotg->dev, "Host side LPM is not supported.\n"); return; } - - /* Change to L0 state */ - hsotg->lx_state = DWC2_L0; - - /* Inform gadget to exit from L1 */ - call_gadget(hsotg, resume); } /* @@ -415,7 +437,7 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) dev_dbg(hsotg->dev, "%s lxstate = %d\n", __func__, hsotg->lx_state); if (hsotg->lx_state == DWC2_L1) { - dwc2_wakeup_from_lpm_l1(hsotg); + dwc2_wakeup_from_lpm_l1(hsotg, false); return; } @@ -438,7 +460,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) /* Exit gadget mode clock gating. */ if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_gadget_exit_clock_gating(hsotg, 0); } else { /* Change to L0 state */ @@ -455,7 +478,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg) } if (hsotg->params.power_down == - DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended) + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) dwc2_host_exit_clock_gating(hsotg, 1); /* diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 519bb82b00..f6b2a4f2e5 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1416,6 +1416,10 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req, ep->name, req, req->length, req->buf, req->no_interrupt, req->zero, req->short_not_ok); + if (hs->lx_state == DWC2_L1) { + dwc2_wakeup_from_lpm_l1(hs, true); + } + /* Prevent new request submission when controller is suspended */ if (hs->lx_state != DWC2_L0) { dev_dbg(hs->dev, "%s: submit request only in active state\n", @@ -3728,6 +3732,12 @@ static irqreturn_t dwc2_hsotg_irq(int irq, void *pw) if (hsotg->in_ppd && hsotg->lx_state == DWC2_L2) dwc2_exit_partial_power_down(hsotg, 0, true); + /* Exit gadget mode clock gating. */ + if (hsotg->params.power_down == + DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && + !hsotg->params.no_clock_gating) + dwc2_gadget_exit_clock_gating(hsotg, 0); + hsotg->lx_state = DWC2_L0; } diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index d17a1dd6d0..f6195f44e5 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -2734,8 +2734,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } qh = list_entry(qh_ptr, struct dwc2_qh, qh_list_entry); - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the periodic ready schedule to the @@ -2768,8 +2771,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions( hsotg->available_host_channels--; } - if (dwc2_assign_and_init_hc(hsotg, qh)) + if (dwc2_assign_and_init_hc(hsotg, qh)) { + if (hsotg->params.uframe_sched) + hsotg->available_host_channels++; break; + } /* * Move the QH from the non-periodic inactive schedule to the @@ -4176,6 +4182,8 @@ void dwc2_host_complete(struct dwc2_hsotg *hsotg, struct dwc2_qtd *qtd, urb->actual_length); if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) { + if (!hsotg->params.dma_desc_enable) + urb->start_frame = qtd->qh->start_active_frame; urb->error_count = dwc2_hcd_urb_get_error_count(qtd->urb); for (i = 0; i < urb->number_of_packets; ++i) { urb->iso_frame_desc[i].actual_length = @@ -4682,7 +4690,7 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, } if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else @@ -5437,9 +5445,16 @@ int dwc2_backup_host_registers(struct dwc2_hsotg *hsotg) /* Backup Host regs */ hr = &hsotg->hr_backup; hr->hcfg = dwc2_readl(hsotg, HCFG); + hr->hflbaddr = dwc2_readl(hsotg, HFLBADDR); hr->haintmsk = dwc2_readl(hsotg, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + hr->hcchar[i] = dwc2_readl(hsotg, HCCHAR(i)); + hr->hcsplt[i] = dwc2_readl(hsotg, HCSPLT(i)); hr->hcintmsk[i] = dwc2_readl(hsotg, HCINTMSK(i)); + hr->hctsiz[i] = dwc2_readl(hsotg, HCTSIZ(i)); + hr->hcidma[i] = dwc2_readl(hsotg, HCDMA(i)); + hr->hcidmab[i] = dwc2_readl(hsotg, HCDMAB(i)); + } hr->hprt0 = dwc2_read_hprt0(hsotg); hr->hfir = dwc2_readl(hsotg, HFIR); @@ -5473,10 +5488,17 @@ int dwc2_restore_host_registers(struct dwc2_hsotg *hsotg) hr->valid = false; dwc2_writel(hsotg, hr->hcfg, HCFG); + dwc2_writel(hsotg, hr->hflbaddr, HFLBADDR); dwc2_writel(hsotg, hr->haintmsk, HAINTMSK); - for (i = 0; i < hsotg->params.host_channels; ++i) + for (i = 0; i < hsotg->params.host_channels; ++i) { + dwc2_writel(hsotg, hr->hcchar[i], HCCHAR(i)); + dwc2_writel(hsotg, hr->hcsplt[i], HCSPLT(i)); dwc2_writel(hsotg, hr->hcintmsk[i], HCINTMSK(i)); + dwc2_writel(hsotg, hr->hctsiz[i], HCTSIZ(i)); + dwc2_writel(hsotg, hr->hcidma[i], HCDMA(i)); + dwc2_writel(hsotg, hr->hcidmab[i], HCDMAB(i)); + } dwc2_writel(hsotg, hr->hprt0, HPRT0); dwc2_writel(hsotg, hr->hfir, HFIR); @@ -5641,10 +5663,12 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, dwc2_writel(hsotg, hr->hcfg, HCFG); /* De-assert Wakeup Logic */ - gpwrdn = dwc2_readl(hsotg, GPWRDN); - gpwrdn &= ~GPWRDN_PMUACTV; - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + if (!(rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } hprt0 = hr->hprt0; hprt0 |= HPRT0_PWR; @@ -5669,6 +5693,13 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, hprt0 |= HPRT0_RES; dwc2_writel(hsotg, hprt0, HPRT0); + /* De-assert Wakeup Logic */ + if ((rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) { + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn &= ~GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } /* Wait for Resume time and then program HPRT again */ mdelay(100); hprt0 &= ~HPRT0_RES; diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c index a858b5f9c1..d6fa02d851 100644 --- a/drivers/usb/dwc2/hcd_ddma.c +++ b/drivers/usb/dwc2/hcd_ddma.c @@ -589,7 +589,7 @@ static void dwc2_init_isoc_dma_desc(struct dwc2_hsotg *hsotg, idx = qh->td_last; inc = qh->host_interval; hsotg->frame_number = dwc2_hcd_get_frame_number(hsotg); - cur_idx = dwc2_frame_list_idx(hsotg->frame_number); + cur_idx = idx; next_idx = dwc2_desclist_idx_inc(qh->td_last, inc, qh->dev_speed); /* @@ -896,20 +896,27 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, { struct dwc2_dma_desc *dma_desc; struct dwc2_hcd_iso_packet_desc *frame_desc; + u16 frame_desc_idx; + struct urb *usb_urb; u16 remain = 0; int rc = 0; if (!qtd->urb) return -EINVAL; + usb_urb = qtd->urb->priv; + dma_sync_single_for_cpu(hsotg->dev, qh->desc_list_dma + (idx * sizeof(struct dwc2_dma_desc)), sizeof(struct dwc2_dma_desc), DMA_FROM_DEVICE); dma_desc = &qh->desc_list[idx]; + frame_desc_idx = (idx - qtd->isoc_td_first) & (usb_urb->number_of_packets - 1); - frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index_last]; + frame_desc = &qtd->urb->iso_descs[frame_desc_idx]; + if (idx == qtd->isoc_td_first) + usb_urb->start_frame = dwc2_hcd_get_frame_number(hsotg); dma_desc->buf = (u32)(qtd->urb->dma + frame_desc->offset); if (chan->ep_is_in) remain = (dma_desc->status & HOST_DMA_ISOC_NBYTES_MASK) >> @@ -930,7 +937,7 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, frame_desc->status = 0; } - if (++qtd->isoc_frame_index == qtd->urb->packet_count) { + if (++qtd->isoc_frame_index == usb_urb->number_of_packets) { /* * urb->status is not used for isoc transfers here. The * individual frame_desc status are used instead. @@ -1035,11 +1042,11 @@ static void dwc2_complete_isoc_xfer_ddma(struct dwc2_hsotg *hsotg, return; idx = dwc2_desclist_idx_inc(idx, qh->host_interval, chan->speed); - if (!rc) + if (rc == 0) continue; - if (rc == DWC2_CMPL_DONE) - break; + if (rc == DWC2_CMPL_DONE || rc == DWC2_CMPL_STOP) + goto stop_scan; /* rc == DWC2_CMPL_STOP */ diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h index 6b16fbf98b..5a07626989 100644 --- a/drivers/usb/dwc2/hw.h +++ b/drivers/usb/dwc2/hw.h @@ -728,7 +728,7 @@ #define TXSTS_QTOP_TOKEN_MASK (0x3 << 25) #define TXSTS_QTOP_TOKEN_SHIFT 25 #define TXSTS_QTOP_TERMINATE BIT(24) -#define TXSTS_QSPCAVAIL_MASK (0xff << 16) +#define TXSTS_QSPCAVAIL_MASK (0x7f << 16) #define TXSTS_QSPCAVAIL_SHIFT 16 #define TXSTS_FSPCAVAIL_MASK (0xffff << 0) #define TXSTS_FSPCAVAIL_SHIFT 0 diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 6496bfbd34..79094384d8 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -344,7 +344,7 @@ static int dwc2_driver_remove(struct platform_device *dev) /* Exit clock gating when driver is removed. */ if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && - hsotg->bus_suspended) { + hsotg->bus_suspended && !hsotg->params.no_clock_gating) { if (dwc2_is_device_mode(hsotg)) dwc2_gadget_exit_clock_gating(hsotg, 0); else diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 0f0269d28c..a469d05247 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -102,6 +102,27 @@ static int dwc3_get_dr_mode(struct dwc3 *dwc) return 0; } +void dwc3_enable_susphy(struct dwc3 *dwc, bool enable) +{ + u32 reg; + + reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); + if (enable && !dwc->dis_u3_susphy_quirk) + reg |= DWC3_GUSB3PIPECTL_SUSPHY; + else + reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; + + dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), reg); + + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (enable && !dwc->dis_u2_susphy_quirk) + reg |= DWC3_GUSB2PHYCFG_SUSPHY; + else + reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; + + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); +} + void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) { u32 reg; @@ -593,11 +614,8 @@ static int dwc3_core_ulpi_init(struct dwc3 *dwc) */ static int dwc3_phy_setup(struct dwc3 *dwc) { - unsigned int hw_mode; u32 reg; - hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0); - reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); /* @@ -607,21 +625,16 @@ static int dwc3_phy_setup(struct dwc3 *dwc) reg &= ~DWC3_GUSB3PIPECTL_UX_EXIT_PX; /* - * Above 1.94a, it is recommended to set DWC3_GUSB3PIPECTL_SUSPHY - * to '0' during coreConsultant configuration. So default value - * will be '0' when the core is reset. Application needs to set it - * to '1' after the core initialization is completed. + * Above DWC_usb3.0 1.94a, it is recommended to set + * DWC3_GUSB3PIPECTL_SUSPHY to '0' during coreConsultant configuration. + * So default value will be '0' when the core is reset. Application + * needs to set it to '1' after the core initialization is completed. + * + * Similarly for DRD controllers, GUSB3PIPECTL.SUSPENDENABLE must be + * cleared after power-on reset, and it can be set after core + * initialization. */ - if (!DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) - reg |= DWC3_GUSB3PIPECTL_SUSPHY; - - /* - * For DRD controllers, GUSB3PIPECTL.SUSPENDENABLE must be cleared after - * power-on reset, and it can be set after core initialization, which is - * after device soft-reset during initialization. - */ - if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD) - reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; + reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; if (dwc->u2ss_inp3_quirk) reg |= DWC3_GUSB3PIPECTL_U2SSINP3OK; @@ -647,9 +660,6 @@ static int dwc3_phy_setup(struct dwc3 *dwc) if (dwc->tx_de_emphasis_quirk) reg |= DWC3_GUSB3PIPECTL_TX_DEEPH(dwc->tx_de_emphasis); - if (dwc->dis_u3_susphy_quirk) - reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; - if (dwc->dis_del_phy_power_chg_quirk) reg &= ~DWC3_GUSB3PIPECTL_DEPOCHANGE; @@ -697,24 +707,15 @@ static int dwc3_phy_setup(struct dwc3 *dwc) } /* - * Above 1.94a, it is recommended to set DWC3_GUSB2PHYCFG_SUSPHY to - * '0' during coreConsultant configuration. So default value will - * be '0' when the core is reset. Application needs to set it to - * '1' after the core initialization is completed. + * Above DWC_usb3.0 1.94a, it is recommended to set + * DWC3_GUSB2PHYCFG_SUSPHY to '0' during coreConsultant configuration. + * So default value will be '0' when the core is reset. Application + * needs to set it to '1' after the core initialization is completed. + * + * Similarly for DRD controllers, GUSB2PHYCFG.SUSPHY must be cleared + * after power-on reset, and it can be set after core initialization. */ - if (!DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) - reg |= DWC3_GUSB2PHYCFG_SUSPHY; - - /* - * For DRD controllers, GUSB2PHYCFG.SUSPHY must be cleared after - * power-on reset, and it can be set after core initialization, which is - * after device soft-reset during initialization. - */ - if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD) - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - - if (dwc->dis_u2_susphy_quirk) - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; + reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; if (dwc->dis_enblslpm_quirk) reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM; @@ -996,21 +997,6 @@ static int dwc3_core_init(struct dwc3 *dwc) if (ret) goto err1; - if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD && - !DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) { - if (!dwc->dis_u3_susphy_quirk) { - reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); - reg |= DWC3_GUSB3PIPECTL_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), reg); - } - - if (!dwc->dis_u2_susphy_quirk) { - reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - reg |= DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); - } - } - dwc3_core_setup_global_control(dwc); dwc3_core_num_eps(dwc); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index d64f7edc70..8c8e17cc13 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1517,6 +1517,7 @@ int dwc3_event_buffers_setup(struct dwc3 *dwc); void dwc3_event_buffers_cleanup(struct dwc3 *dwc); int dwc3_core_soft_reset(struct dwc3 *dwc); +void dwc3_enable_susphy(struct dwc3 *dwc, bool enable); #if IS_ENABLED(CONFIG_USB_DWC3_HOST) || IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE) int dwc3_host_init(struct dwc3 *dwc); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 86cf3b2b66..af35278a5e 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2775,6 +2775,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) dwc3_ep0_out_start(dwc); dwc3_gadget_enable_irq(dwc); + dwc3_enable_susphy(dwc, true); return 0; @@ -4512,6 +4513,7 @@ void dwc3_gadget_exit(struct dwc3 *dwc) if (!dwc->gadget) return; + dwc3_enable_susphy(dwc, false); usb_del_gadget(dwc->gadget); dwc3_gadget_free_endpoints(dwc); usb_put_gadget(dwc->gadget); diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index 012b54cb84..9adcf3a7e9 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -9,9 +9,30 @@ #include #include +#include +#include +#include "../host/xhci-plat.h" #include "core.h" +static void dwc3_xhci_plat_start(struct usb_hcd *hcd) +{ + struct platform_device *pdev; + struct dwc3 *dwc; + + if (!usb_hcd_is_primary_hcd(hcd)) + return; + + pdev = to_platform_device(hcd->self.controller); + dwc = dev_get_drvdata(pdev->dev.parent); + + dwc3_enable_susphy(dwc, true); +} + +static const struct xhci_plat_priv dwc3_xhci_plat_quirk = { + .plat_start = dwc3_xhci_plat_start, +}; + static int dwc3_host_get_irq(struct dwc3 *dwc) { struct platform_device *dwc3_pdev = to_platform_device(dwc->dev); @@ -117,6 +138,11 @@ int dwc3_host_init(struct dwc3 *dwc) } } + ret = platform_device_add_data(xhci, &dwc3_xhci_plat_quirk, + sizeof(struct xhci_plat_priv)); + if (ret) + goto err; + ret = platform_device_add(xhci); if (ret) { dev_err(dwc->dev, "failed to register xHCI device\n"); @@ -131,6 +157,7 @@ int dwc3_host_init(struct dwc3 *dwc) void dwc3_host_exit(struct dwc3 *dwc) { + dwc3_enable_susphy(dwc, false); platform_device_unregister(dwc->xhci); dwc->xhci = NULL; } diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index edce0a1bdd..3f035e905b 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1978,7 +1978,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) buf[5] = 0x01; switch (ctrl->bRequestType & USB_RECIP_MASK) { case USB_RECIP_DEVICE: - if (w_index != 0x4 || (w_value >> 8)) + if (w_index != 0x4 || (w_value & 0xff)) break; buf[6] = w_index; /* Number of ext compat interfaces */ @@ -1994,9 +1994,9 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) } break; case USB_RECIP_INTERFACE: - if (w_index != 0x5 || (w_value >> 8)) + if (w_index != 0x5 || (w_value & 0xff)) break; - interface = w_value & 0xFF; + interface = w_value >> 8; if (interface >= MAX_CONFIG_INTERFACES || !os_desc_cfg->interface[interface]) break; diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 73ad9c3acc..a4367a43cd 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3414,7 +3414,7 @@ static int ffs_func_setup(struct usb_function *f, __ffs_event_add(ffs, FUNCTIONFS_SETUP); spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags); - return creq->wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0; + return ffs->ev.setup.wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0; } static bool ffs_func_req_match(struct usb_function *f, diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 537f7a729f..00995d65b5 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -888,7 +888,7 @@ static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) if (alt > 1) goto fail; - if (ncm->port.in_ep->enabled) { + if (ncm->netdev) { DBG(cdev, "reset ncm\n"); ncm->netdev = NULL; gether_disconnect(&ncm->port); @@ -1352,7 +1352,7 @@ static int ncm_unwrap_ntb(struct gether *port, if (to_process == 1 && (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) { to_process--; - } else if (to_process > 0) { + } else if ((to_process > 0) && (block_len != 0)) { ntb_ptr = (unsigned char *)(ntb_ptr + block_len); goto parse_ntb; } @@ -1373,7 +1373,7 @@ static void ncm_disable(struct usb_function *f) DBG(cdev, "ncm deactivated\n"); - if (ncm->port.in_ep->enabled) { + if (ncm->netdev) { ncm->netdev = NULL; gether_disconnect(&ncm->port); } diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 200eb788a7..5e34a7ff1b 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -1172,6 +1172,8 @@ void g_audio_cleanup(struct g_audio *g_audio) return; uac = g_audio->uac; + g_audio->uac = NULL; + card = uac->card; if (card) snd_card_free_when_closed(card); diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 4f50a8e18d..808a8d062e 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -275,7 +275,9 @@ int usb_ep_queue(struct usb_ep *ep, { int ret = 0; - if (WARN_ON_ONCE(!ep->enabled && ep->address)) { + if (!ep->enabled && ep->address) { + pr_debug("USB gadget: queue request to disabled ep 0x%x (%s)\n", + ep->address, ep->name); ret = -ESHUTDOWN; goto out; } diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index 52996bf2cc..fdbb9d73aa 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -3480,8 +3480,8 @@ static void tegra_xudc_device_params_init(struct tegra_xudc *xudc) static int tegra_xudc_phy_get(struct tegra_xudc *xudc) { - int err = 0, usb3; - unsigned int i; + int err = 0, usb3_companion_port; + unsigned int i, j; xudc->utmi_phy = devm_kcalloc(xudc->dev, xudc->soc->num_phys, sizeof(*xudc->utmi_phy), GFP_KERNEL); @@ -3509,7 +3509,7 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc) if (IS_ERR(xudc->utmi_phy[i])) { err = PTR_ERR(xudc->utmi_phy[i]); dev_err_probe(xudc->dev, err, - "failed to get usb2-%d PHY\n", i); + "failed to get PHY for phy-name usb2-%d\n", i); goto clean_up; } else if (xudc->utmi_phy[i]) { /* Get usb-phy, if utmi phy is available */ @@ -3528,19 +3528,30 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc) } /* Get USB3 phy */ - usb3 = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i); - if (usb3 < 0) + usb3_companion_port = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i); + if (usb3_companion_port < 0) continue; - snprintf(phy_name, sizeof(phy_name), "usb3-%d", usb3); - xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name); - if (IS_ERR(xudc->usb3_phy[i])) { - err = PTR_ERR(xudc->usb3_phy[i]); - dev_err_probe(xudc->dev, err, - "failed to get usb3-%d PHY\n", usb3); - goto clean_up; - } else if (xudc->usb3_phy[i]) - dev_dbg(xudc->dev, "usb3-%d PHY registered", usb3); + for (j = 0; j < xudc->soc->num_phys; j++) { + snprintf(phy_name, sizeof(phy_name), "usb3-%d", j); + xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name); + if (IS_ERR(xudc->usb3_phy[i])) { + err = PTR_ERR(xudc->usb3_phy[i]); + dev_err_probe(xudc->dev, err, + "failed to get PHY for phy-name usb3-%d\n", j); + goto clean_up; + } else if (xudc->usb3_phy[i]) { + int usb2_port = + tegra_xusb_padctl_get_port_number(xudc->utmi_phy[i]); + int usb3_port = + tegra_xusb_padctl_get_port_number(xudc->usb3_phy[i]); + if (usb3_port == usb3_companion_port) { + dev_dbg(xudc->dev, "USB2 port %d is paired with USB3 port %d for device mode port %d\n", + usb2_port, usb3_port, i); + break; + } + } + } } return err; diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 1f5e69314a..90185d1df2 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -890,6 +890,7 @@ static irqreturn_t ohci_irq (struct usb_hcd *hcd) /* Check for an all 1's result which is a typical consequence * of dead, unclocked, or unplugged (CardBus...) devices */ +again: if (ints == ~(u32)0) { ohci->rh_state = OHCI_RH_HALTED; ohci_dbg (ohci, "device removed!\n"); @@ -984,6 +985,13 @@ static irqreturn_t ohci_irq (struct usb_hcd *hcd) } spin_unlock(&ohci->lock); + /* repeat until all enabled interrupts are handled */ + if (ohci->rh_state != OHCI_RH_HALTED) { + ints = ohci_readl(ohci, ®s->intrstatus); + if (ints && (ints & ohci_readl(ohci, ®s->intrenable))) + goto again; + } + return IRQ_HANDLED; } diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index 825ff67273..d3de9f4755 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -585,6 +585,7 @@ done(struct sl811 *sl811, struct sl811h_ep *ep, u8 bank) finish_request(sl811, ep, urb, urbstat); } +#ifdef QUIRK2 static inline u8 checkdone(struct sl811 *sl811) { u8 ctl; @@ -616,6 +617,7 @@ static inline u8 checkdone(struct sl811 *sl811) #endif return irqstat; } +#endif static irqreturn_t sl811h_irq(struct usb_hcd *hcd) { diff --git a/drivers/usb/host/xhci-plat.h b/drivers/usb/host/xhci-plat.h index 561d0b7bce..29f15298e3 100644 --- a/drivers/usb/host/xhci-plat.h +++ b/drivers/usb/host/xhci-plat.h @@ -8,7 +8,9 @@ #ifndef _XHCI_PLAT_H #define _XHCI_PLAT_H -#include "xhci.h" /* for hcd_to_xhci() */ +struct device; +struct platform_device; +struct usb_hcd; struct xhci_plat_priv { const char *firmware_name; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 5c9d3be136..2539d97b90 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1324,6 +1324,8 @@ static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb) temp = kzalloc_node(buf_len, GFP_ATOMIC, dev_to_node(hcd->self.sysdev)); + if (!temp) + return -ENOMEM; if (usb_urb_dir_out(urb)) sg_pcopy_to_buffer(urb->sg, urb->num_sgs, diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 34b9f81401..661a229c10 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -268,13 +268,6 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) return -EPROBE_DEFER; } - nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); - if (PTR_ERR(nop->vbus_draw) == -ENODEV) - nop->vbus_draw = NULL; - if (IS_ERR(nop->vbus_draw)) - return dev_err_probe(dev, PTR_ERR(nop->vbus_draw), - "could not get vbus regulator\n"); - nop->dev = dev; nop->phy.dev = nop->dev; nop->phy.label = "nop-xceiv"; diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f47c2f3922..4183942a1c 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -56,6 +56,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ + { USB_DEVICE(0x04BF, 0x1301) }, /* TDK Corporation NC0110013M - Network Controller */ + { USB_DEVICE(0x04BF, 0x1303) }, /* TDK Corporation MM0110113M - i3 Micro Module */ { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */ { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ @@ -144,6 +146,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ + { USB_DEVICE(0x10C4, 0x863C) }, /* MGP Instruments PDS100 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ { USB_DEVICE(0x10C4, 0x87ED) }, /* IMST USB-Stick for Smart Meter */ @@ -177,6 +180,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */ { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */ { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */ + { USB_DEVICE(0x11CA, 0x0212) }, /* Verifone USB to Printer (UART, CP2102) */ { USB_DEVICE(0x12B8, 0xEC60) }, /* Link G4 ECU */ { USB_DEVICE(0x12B8, 0xEC62) }, /* Link G4+ ECU */ { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */ diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 2345208b72..1915b92c38 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1055,6 +1055,8 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_FALCONIA_JTAG_UNBUF_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + /* GMC devices */ + { USB_DEVICE(GMC_VID, GMC_Z216C_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 9a0f9fc991..b2aec11066 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1599,3 +1599,9 @@ #define UBLOX_VID 0x1546 #define UBLOX_C099F9P_ZED_PID 0x0502 #define UBLOX_C099F9P_ODIN_PID 0x0503 + +/* + * GMC devices + */ +#define GMC_VID 0x1cd7 +#define GMC_Z216C_PID 0x0217 /* GMC Z216C Adapter IR-USB */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c0a0cca654..b5ee8518fc 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -255,6 +255,10 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM061K_LMS 0x0124 #define QUECTEL_PRODUCT_EC25 0x0125 #define QUECTEL_PRODUCT_EM060K_128 0x0128 +#define QUECTEL_PRODUCT_EM060K_129 0x0129 +#define QUECTEL_PRODUCT_EM060K_12a 0x012a +#define QUECTEL_PRODUCT_EM060K_12b 0x012b +#define QUECTEL_PRODUCT_EM060K_12c 0x012c #define QUECTEL_PRODUCT_EG91 0x0191 #define QUECTEL_PRODUCT_EG95 0x0195 #define QUECTEL_PRODUCT_BG96 0x0296 @@ -613,6 +617,11 @@ static void option_instat_callback(struct urb *urb); /* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ #define LUAT_PRODUCT_AIR720U 0x4e00 +/* MeiG Smart Technology products */ +#define MEIGSMART_VENDOR_ID 0x2dee +/* MeiG Smart SLM320 based on UNISOC UIS8910 */ +#define MEIGSMART_PRODUCT_SLM320 0x4d41 + /* Device flags */ /* Highest interface number which can be used with NCTRL() and RSVD() */ @@ -1213,6 +1222,18 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) }, @@ -1355,6 +1376,12 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a0, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a4, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a9, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), @@ -2047,6 +2074,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9803, 0xff), .driver_info = RSVD(4) }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b05), /* Longsung U8300 */ + .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b3c), /* Longsung U9300 */ + .driver_info = RSVD(0) | RSVD(4) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) }, { USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) }, @@ -2267,21 +2298,36 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0115, 0xff), /* Fibocom FM135 (laptop MBIM) */ + .driver_info = RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a04, 0xff) }, /* Fibocom FM650-CN (ECM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a05, 0xff) }, /* Fibocom FM650-CN (NCM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a06, 0xff) }, /* Fibocom FM650-CN (RNDIS mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a07, 0xff) }, /* Fibocom FM650-CN (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ + { USB_DEVICE(0x33f8, 0x0104), /* Rolling RW101-GL (laptop RMNET) */ + .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a2, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a3, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a4, 0xff), /* Rolling RW101-GL (laptop MBIM) */ + .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0115, 0xff), /* Rolling RW135-GL (laptop MBIM) */ + .driver_info = RSVD(5) }, { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM320, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c index 3c76336e43..1d22db59f7 100644 --- a/drivers/usb/storage/isd200.c +++ b/drivers/usb/storage/isd200.c @@ -1105,7 +1105,7 @@ static void isd200_dump_driveid(struct us_data *us, u16 *id) static int isd200_get_inquiry_data( struct us_data *us ) { struct isd200_info *info = (struct isd200_info *)us->extra; - int retStatus = ISD200_GOOD; + int retStatus; u16 *id = info->id; usb_stor_dbg(us, "Entering isd200_get_inquiry_data\n"); @@ -1137,6 +1137,13 @@ static int isd200_get_inquiry_data( struct us_data *us ) isd200_fix_driveid(id); isd200_dump_driveid(us, id); + /* Prevent division by 0 in isd200_scsi_to_ata() */ + if (id[ATA_ID_HEADS] == 0 || id[ATA_ID_SECTORS] == 0) { + usb_stor_dbg(us, " Invalid ATA Identify data\n"); + retStatus = ISD200_ERROR; + goto Done; + } + memset(&info->InquiryData, 0, sizeof(info->InquiryData)); /* Standard IDE interface only supports disks */ @@ -1202,6 +1209,7 @@ static int isd200_get_inquiry_data( struct us_data *us ) } } + Done: usb_stor_dbg(us, "Leaving isd200_get_inquiry_data %08X\n", retStatus); return(retStatus); @@ -1481,22 +1489,27 @@ static int isd200_init_info(struct us_data *us) static int isd200_Initialization(struct us_data *us) { + int rc = 0; + usb_stor_dbg(us, "ISD200 Initialization...\n"); /* Initialize ISD200 info struct */ - if (isd200_init_info(us) == ISD200_ERROR) { + if (isd200_init_info(us) < 0) { usb_stor_dbg(us, "ERROR Initializing ISD200 Info struct\n"); + rc = -ENOMEM; } else { /* Get device specific data */ - if (isd200_get_inquiry_data(us) != ISD200_GOOD) + if (isd200_get_inquiry_data(us) != ISD200_GOOD) { usb_stor_dbg(us, "ISD200 Initialization Failure\n"); - else + rc = -EINVAL; + } else { usb_stor_dbg(us, "ISD200 Initialization complete\n"); + } } - return 0; + return rc; } diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index b8e1109f0e..e104aa651c 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -393,7 +393,6 @@ static int queuecommand_lck(struct scsi_cmnd *srb, } /* enqueue the command and wake up the control thread */ - srb->scsi_done = done; us->srb = srb; complete(&us->cmnd_ready); diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index bef89c6bd1..11a551a9cd 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -113,7 +113,7 @@ static void uas_do_work(struct work_struct *work) continue; cmnd = devinfo->cmnd[i]; - cmdinfo = (void *)&cmnd->SCp; + cmdinfo = scsi_cmd_priv(cmnd); if (!(cmdinfo->state & IS_IN_WORK_LIST)) continue; @@ -139,10 +139,9 @@ static void uas_scan_work(struct work_struct *work) dev_dbg(&devinfo->intf->dev, "scan complete\n"); } -static void uas_add_work(struct uas_cmd_info *cmdinfo) +static void uas_add_work(struct scsi_cmnd *cmnd) { - struct scsi_pointer *scp = (void *)cmdinfo; - struct scsi_cmnd *cmnd = container_of(scp, struct scsi_cmnd, SCp); + struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); struct uas_dev_info *devinfo = cmnd->device->hostdata; lockdep_assert_held(&devinfo->lock); @@ -163,7 +162,7 @@ static void uas_zap_pending(struct uas_dev_info *devinfo, int result) continue; cmnd = devinfo->cmnd[i]; - cmdinfo = (void *)&cmnd->SCp; + cmdinfo = scsi_cmd_priv(cmnd); uas_log_cmd_state(cmnd, __func__, 0); /* Sense urbs were killed, clear COMMAND_INFLIGHT manually */ cmdinfo->state &= ~COMMAND_INFLIGHT; @@ -200,15 +199,14 @@ static void uas_sense(struct urb *urb, struct scsi_cmnd *cmnd) static void uas_log_cmd_state(struct scsi_cmnd *cmnd, const char *prefix, int status) { - struct uas_cmd_info *ci = (void *)&cmnd->SCp; - struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; + struct uas_cmd_info *ci = scsi_cmd_priv(cmnd); if (status == -ENODEV) /* too late */ return; scmd_printk(KERN_INFO, cmnd, "%s %d uas-tag %d inflight:%s%s%s%s%s%s%s%s%s%s%s%s ", - prefix, status, cmdinfo->uas_tag, + prefix, status, ci->uas_tag, (ci->state & SUBMIT_STATUS_URB) ? " s-st" : "", (ci->state & ALLOC_DATA_IN_URB) ? " a-in" : "", (ci->state & SUBMIT_DATA_IN_URB) ? " s-in" : "", @@ -231,7 +229,7 @@ static void uas_free_unsubmitted_urbs(struct scsi_cmnd *cmnd) if (!cmnd) return; - cmdinfo = (void *)&cmnd->SCp; + cmdinfo = scsi_cmd_priv(cmnd); if (cmdinfo->state & SUBMIT_CMD_URB) usb_free_urb(cmdinfo->cmd_urb); @@ -245,7 +243,7 @@ static void uas_free_unsubmitted_urbs(struct scsi_cmnd *cmnd) static int uas_try_complete(struct scsi_cmnd *cmnd, const char *caller) { - struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; + struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); struct uas_dev_info *devinfo = (void *)cmnd->device->hostdata; lockdep_assert_held(&devinfo->lock); @@ -256,20 +254,20 @@ static int uas_try_complete(struct scsi_cmnd *cmnd, const char *caller) return -EBUSY; devinfo->cmnd[cmdinfo->uas_tag - 1] = NULL; uas_free_unsubmitted_urbs(cmnd); - cmnd->scsi_done(cmnd); + scsi_done(cmnd); return 0; } static void uas_xfer_data(struct urb *urb, struct scsi_cmnd *cmnd, unsigned direction) { - struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; + struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); int err; cmdinfo->state |= direction | SUBMIT_STATUS_URB; err = uas_submit_urbs(cmnd, cmnd->device->hostdata); if (err) { - uas_add_work(cmdinfo); + uas_add_work(cmnd); } } @@ -329,7 +327,7 @@ static void uas_stat_cmplt(struct urb *urb) } cmnd = devinfo->cmnd[idx]; - cmdinfo = (void *)&cmnd->SCp; + cmdinfo = scsi_cmd_priv(cmnd); if (!(cmdinfo->state & COMMAND_INFLIGHT)) { uas_log_cmd_state(cmnd, "unexpected status cmplt", 0); @@ -394,7 +392,7 @@ static void uas_stat_cmplt(struct urb *urb) static void uas_data_cmplt(struct urb *urb) { struct scsi_cmnd *cmnd = urb->context; - struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; + struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); struct uas_dev_info *devinfo = (void *)cmnd->device->hostdata; struct scsi_data_buffer *sdb = &cmnd->sdb; unsigned long flags; @@ -446,7 +444,7 @@ static struct urb *uas_alloc_data_urb(struct uas_dev_info *devinfo, gfp_t gfp, enum dma_data_direction dir) { struct usb_device *udev = devinfo->udev; - struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; + struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); struct urb *urb = usb_alloc_urb(0, gfp); struct scsi_data_buffer *sdb = &cmnd->sdb; unsigned int pipe = (dir == DMA_FROM_DEVICE) @@ -468,7 +466,7 @@ static struct urb *uas_alloc_sense_urb(struct uas_dev_info *devinfo, gfp_t gfp, struct scsi_cmnd *cmnd) { struct usb_device *udev = devinfo->udev; - struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; + struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); struct urb *urb = usb_alloc_urb(0, gfp); struct sense_iu *iu; @@ -496,7 +494,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp, { struct usb_device *udev = devinfo->udev; struct scsi_device *sdev = cmnd->device; - struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; + struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); struct urb *urb = usb_alloc_urb(0, gfp); struct command_iu *iu; int len; @@ -535,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp, * daft to me. */ -static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) +static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) { struct uas_dev_info *devinfo = cmnd->device->hostdata; struct urb *urb; @@ -543,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp) urb = uas_alloc_sense_urb(devinfo, gfp, cmnd); if (!urb) - return NULL; + return -ENOMEM; usb_anchor_urb(urb, &devinfo->sense_urbs); err = usb_submit_urb(urb, gfp); if (err) { usb_unanchor_urb(urb); uas_log_cmd_state(cmnd, "sense submit err", err); usb_free_urb(urb); - return NULL; } - return urb; + return err; } static int uas_submit_urbs(struct scsi_cmnd *cmnd, struct uas_dev_info *devinfo) { - struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; - struct urb *urb; + struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); int err; lockdep_assert_held(&devinfo->lock); if (cmdinfo->state & SUBMIT_STATUS_URB) { - urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC); - if (!urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + err = uas_submit_sense_urb(cmnd, GFP_ATOMIC); + if (err) + return err; cmdinfo->state &= ~SUBMIT_STATUS_URB; } @@ -574,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_FROM_DEVICE); if (!cmdinfo->data_in_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_IN_URB; } @@ -584,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_in_urb); uas_log_cmd_state(cmnd, "data in submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_IN_URB; cmdinfo->state |= DATA_IN_URB_INFLIGHT; @@ -594,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC, cmnd, DMA_TO_DEVICE); if (!cmdinfo->data_out_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_DATA_OUT_URB; } @@ -604,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->data_out_urb); uas_log_cmd_state(cmnd, "data out submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->state &= ~SUBMIT_DATA_OUT_URB; cmdinfo->state |= DATA_OUT_URB_INFLIGHT; @@ -613,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (cmdinfo->state & ALLOC_CMD_URB) { cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd); if (!cmdinfo->cmd_urb) - return SCSI_MLQUEUE_DEVICE_BUSY; + return -ENOMEM; cmdinfo->state &= ~ALLOC_CMD_URB; } @@ -623,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, if (err) { usb_unanchor_urb(cmdinfo->cmd_urb); uas_log_cmd_state(cmnd, "cmd submit err", err); - return SCSI_MLQUEUE_DEVICE_BUSY; + return err; } cmdinfo->cmd_urb = NULL; cmdinfo->state &= ~SUBMIT_CMD_URB; @@ -638,12 +634,10 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd, { struct scsi_device *sdev = cmnd->device; struct uas_dev_info *devinfo = sdev->hostdata; - struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; + struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); unsigned long flags; int idx, err; - BUILD_BUG_ON(sizeof(struct uas_cmd_info) > sizeof(struct scsi_pointer)); - /* Re-check scsi_block_requests now that we've the host-lock */ if (cmnd->device->host->host_self_blocked) return SCSI_MLQUEUE_DEVICE_BUSY; @@ -653,7 +647,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd, memcpy(cmnd->sense_buffer, usb_stor_sense_invalidCDB, sizeof(usb_stor_sense_invalidCDB)); cmnd->result = SAM_STAT_CHECK_CONDITION; - cmnd->scsi_done(cmnd); + scsi_done(cmnd); return 0; } @@ -661,7 +655,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd, if (devinfo->resetting) { set_host_byte(cmnd, DID_ERROR); - cmnd->scsi_done(cmnd); + scsi_done(cmnd); goto zombie; } @@ -675,8 +669,6 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd, return SCSI_MLQUEUE_DEVICE_BUSY; } - cmnd->scsi_done = done; - memset(cmdinfo, 0, sizeof(*cmdinfo)); cmdinfo->uas_tag = idx + 1; /* uas-tag == usb-stream-id, so 1 based */ cmdinfo->state = SUBMIT_STATUS_URB | ALLOC_CMD_URB | SUBMIT_CMD_URB; @@ -705,8 +697,8 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd, * of queueing, no matter how fatal the error */ if (err == -ENODEV) { - set_host_byte(cmnd, DID_ERROR); - cmnd->scsi_done(cmnd); + set_host_byte(cmnd, DID_NO_CONNECT); + scsi_done(cmnd); goto zombie; } if (err) { @@ -715,7 +707,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd, spin_unlock_irqrestore(&devinfo->lock, flags); return SCSI_MLQUEUE_DEVICE_BUSY; } - uas_add_work(cmdinfo); + uas_add_work(cmnd); } devinfo->cmnd[idx] = cmnd; @@ -733,7 +725,7 @@ static DEF_SCSI_QCMD(uas_queuecommand) */ static int uas_eh_abort_handler(struct scsi_cmnd *cmnd) { - struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; + struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd); struct uas_dev_info *devinfo = (void *)cmnd->device->hostdata; struct urb *data_in_urb = NULL; struct urb *data_out_urb = NULL; @@ -913,6 +905,7 @@ static struct scsi_host_template uas_host_template = { .this_id = -1, .skip_settle_delay = 1, .dma_boundary = PAGE_SIZE - 1, + .cmd_size = sizeof(struct uas_cmd_info), }; #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 90aa9c12ff..8b543f2c98 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -388,7 +388,7 @@ static int usb_stor_control_thread(void * __us) if (srb->result == DID_ABORT << 16) { SkipForAbort: usb_stor_dbg(us, "scsi command aborted\n"); - srb = NULL; /* Don't call srb->scsi_done() */ + srb = NULL; /* Don't call scsi_done() */ } /* @@ -417,7 +417,7 @@ static int usb_stor_control_thread(void * __us) if (srb) { usb_stor_dbg(us, "scsi cmd done, result=0x%x\n", srb->result); - srb->scsi_done(srb); + scsi_done(srb); } } /* for (;;) */ diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index a7b0134d38..e047a15e67 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -899,6 +899,7 @@ MODULE_DEVICE_TABLE(i2c, tcpci_id); #ifdef CONFIG_OF static const struct of_device_id tcpci_of_match[] = { { .compatible = "nxp,ptn5110", }, + { .compatible = "tcpci", }, {}, }; MODULE_DEVICE_TABLE(of, tcpci_of_match); diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 73cd5bf350..2431febc46 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -275,8 +275,6 @@ static void ucsi_displayport_work(struct work_struct *work) struct ucsi_dp *dp = container_of(work, struct ucsi_dp, work); int ret; - mutex_lock(&dp->con->lock); - ret = typec_altmode_vdm(dp->alt, dp->header, dp->vdo_data, dp->vdo_size); if (ret) @@ -285,8 +283,6 @@ static void ucsi_displayport_work(struct work_struct *work) dp->vdo_data = NULL; dp->vdo_size = 0; dp->header = 0; - - mutex_unlock(&dp->con->lock); } void ucsi_displayport_remove_partner(struct typec_altmode *alt) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index dca6803a75..cb6458ec04 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -138,8 +138,12 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd) if (!(cci & UCSI_CCI_COMMAND_COMPLETE)) return -EIO; - if (cci & UCSI_CCI_NOT_SUPPORTED) + if (cci & UCSI_CCI_NOT_SUPPORTED) { + if (ucsi_acknowledge_command(ucsi) < 0) + dev_err(ucsi->dev, + "ACK of unsupported command failed\n"); return -EOPNOTSUPP; + } if (cci & UCSI_CCI_ERROR) { if (cmd == UCSI_GET_ERROR_STATUS) @@ -850,7 +854,7 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num) struct ucsi_connector *con = &ucsi->connector[num - 1]; if (!(ucsi->ntfy & UCSI_ENABLE_NTFY_CONNECTOR_CHANGE)) { - dev_dbg(ucsi->dev, "Bogus connector change event\n"); + dev_dbg(ucsi->dev, "Early connector change event\n"); return; } @@ -875,13 +879,47 @@ static int ucsi_reset_connector(struct ucsi_connector *con, bool hard) static int ucsi_reset_ppm(struct ucsi *ucsi) { - u64 command = UCSI_PPM_RESET; + u64 command; unsigned long tmo; u32 cci; int ret; mutex_lock(&ucsi->ppm_lock); + ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci)); + if (ret < 0) + goto out; + + /* + * If UCSI_CCI_RESET_COMPLETE is already set we must clear + * the flag before we start another reset. Send a + * UCSI_SET_NOTIFICATION_ENABLE command to achieve this. + * Ignore a timeout and try the reset anyway if this fails. + */ + if (cci & UCSI_CCI_RESET_COMPLETE) { + command = UCSI_SET_NOTIFICATION_ENABLE; + ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, + sizeof(command)); + if (ret < 0) + goto out; + + tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS); + do { + ret = ucsi->ops->read(ucsi, UCSI_CCI, + &cci, sizeof(cci)); + if (ret < 0) + goto out; + if (cci & UCSI_CCI_COMMAND_COMPLETE) + break; + if (time_is_before_jiffies(tmo)) + break; + msleep(20); + } while (1); + + WARN_ON(cci & UCSI_CCI_RESET_COMPLETE); + } + + command = UCSI_PPM_RESET; ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command, sizeof(command)); if (ret < 0) @@ -1203,6 +1241,7 @@ static int ucsi_init(struct ucsi *ucsi) { struct ucsi_connector *con; u64 command, ntfy; + u32 cci; int ret; int i; @@ -1254,6 +1293,15 @@ static int ucsi_init(struct ucsi *ucsi) goto err_unregister; ucsi->ntfy = ntfy; + + mutex_lock(&ucsi->ppm_lock); + ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci)); + mutex_unlock(&ucsi->ppm_lock); + if (ret) + return ret; + if (UCSI_CCI_CONNECTOR(cci)) + ucsi_connector_change(ucsi, UCSI_CCI_CONNECTOR(cci)); + return 0; err_unregister: diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index cee6667909..3dc3da8dbb 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -220,12 +220,12 @@ struct ucsi_cable_property { #define UCSI_CABLE_PROP_FLAG_VBUS_IN_CABLE BIT(0) #define UCSI_CABLE_PROP_FLAG_ACTIVE_CABLE BIT(1) #define UCSI_CABLE_PROP_FLAG_DIRECTIONALITY BIT(2) -#define UCSI_CABLE_PROP_FLAG_PLUG_TYPE(_f_) ((_f_) & GENMASK(3, 0)) +#define UCSI_CABLE_PROP_FLAG_PLUG_TYPE(_f_) (((_f_) & GENMASK(4, 3)) >> 3) #define UCSI_CABLE_PROPERTY_PLUG_TYPE_A 0 #define UCSI_CABLE_PROPERTY_PLUG_TYPE_B 1 #define UCSI_CABLE_PROPERTY_PLUG_TYPE_C 2 #define UCSI_CABLE_PROPERTY_PLUG_OTHER 3 -#define UCSI_CABLE_PROP_MODE_SUPPORT BIT(5) +#define UCSI_CABLE_PROP_FLAG_MODE_SUPPORT BIT(5) u8 latency; } __packed; diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c index 77e584093a..50c2f82f2b 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c @@ -142,13 +142,14 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, irq = &vdev->mc_irqs[index]; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_fsl_mc_irq_handler(hwirq, irq); + if (irq->trigger) + eventfd_signal(irq->trigger, 1); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { u8 trigger = *(u8 *)data; - if (trigger) - vfio_fsl_mc_irq_handler(hwirq, irq); + if (trigger && irq->trigger) + eventfd_signal(irq->trigger, 1); } return 0; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 6069a11fb5..cb55f96b4f 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -29,15 +29,22 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) { struct vfio_pci_core_device *vdev = opaque; - if (likely(is_intx(vdev) && !vdev->virq_disabled)) - eventfd_signal(vdev->ctx[0].trigger, 1); + if (likely(is_intx(vdev) && !vdev->virq_disabled)) { + struct eventfd_ctx *trigger; + + trigger = READ_ONCE(vdev->ctx[0].trigger); + if (likely(trigger)) + eventfd_signal(trigger, 1); + } } -void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) +static void __vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; unsigned long flags; + lockdep_assert_held(&vdev->igate); + spin_lock_irqsave(&vdev->irqlock, flags); /* @@ -65,6 +72,13 @@ void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) spin_unlock_irqrestore(&vdev->irqlock, flags); } +void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) +{ + mutex_lock(&vdev->igate); + __vfio_pci_intx_mask(vdev); + mutex_unlock(&vdev->igate); +} + /* * If this is triggered by an eventfd, we can't call eventfd_signal * or else we'll deadlock on the eventfd wait queue. Return >0 when @@ -107,12 +121,21 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) return ret; } -void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) +static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) { + lockdep_assert_held(&vdev->igate); + if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0) vfio_send_intx_eventfd(vdev, NULL); } +void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) +{ + mutex_lock(&vdev->igate); + __vfio_pci_intx_unmask(vdev); + mutex_unlock(&vdev->igate); +} + static irqreturn_t vfio_intx_handler(int irq, void *dev_id) { struct vfio_pci_core_device *vdev = dev_id; @@ -139,95 +162,104 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id) return ret; } -static int vfio_intx_enable(struct vfio_pci_core_device *vdev) +static int vfio_intx_enable(struct vfio_pci_core_device *vdev, + struct eventfd_ctx *trigger) { + struct pci_dev *pdev = vdev->pdev; + unsigned long irqflags; + char *name; + int ret; + if (!is_irq_none(vdev)) return -EINVAL; - if (!vdev->pdev->irq) + if (!pdev->irq) return -ENODEV; + name = kasprintf(GFP_KERNEL, "vfio-intx(%s)", pci_name(pdev)); + if (!name) + return -ENOMEM; + vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); if (!vdev->ctx) return -ENOMEM; vdev->num_ctx = 1; + vdev->ctx[0].name = name; + vdev->ctx[0].trigger = trigger; + /* - * If the virtual interrupt is masked, restore it. Devices - * supporting DisINTx can be masked at the hardware level - * here, non-PCI-2.3 devices will have to wait until the - * interrupt is enabled. + * Fill the initial masked state based on virq_disabled. After + * enable, changing the DisINTx bit in vconfig directly changes INTx + * masking. igate prevents races during setup, once running masked + * is protected via irqlock. + * + * Devices supporting DisINTx also reflect the current mask state in + * the physical DisINTx bit, which is not affected during IRQ setup. + * + * Devices without DisINTx support require an exclusive interrupt. + * IRQ masking is performed at the IRQ chip. Again, igate protects + * against races during setup and IRQ handlers and irqfds are not + * yet active, therefore masked is stable and can be used to + * conditionally auto-enable the IRQ. + * + * irq_type must be stable while the IRQ handler is registered, + * therefore it must be set before request_irq(). */ vdev->ctx[0].masked = vdev->virq_disabled; - if (vdev->pci_2_3) - pci_intx(vdev->pdev, !vdev->ctx[0].masked); + if (vdev->pci_2_3) { + pci_intx(pdev, !vdev->ctx[0].masked); + irqflags = IRQF_SHARED; + } else { + irqflags = vdev->ctx[0].masked ? IRQF_NO_AUTOEN : 0; + } vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; + ret = request_irq(pdev->irq, vfio_intx_handler, + irqflags, vdev->ctx[0].name, vdev); + if (ret) { + vdev->irq_type = VFIO_PCI_NUM_IRQS; + kfree(name); + vdev->num_ctx = 0; + kfree(vdev->ctx); + return ret; + } + return 0; } -static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) +static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, + struct eventfd_ctx *trigger) { struct pci_dev *pdev = vdev->pdev; - unsigned long irqflags = IRQF_SHARED; - struct eventfd_ctx *trigger; - unsigned long flags; - int ret; + struct eventfd_ctx *old; - if (vdev->ctx[0].trigger) { - free_irq(pdev->irq, vdev); - kfree(vdev->ctx[0].name); - eventfd_ctx_put(vdev->ctx[0].trigger); - vdev->ctx[0].trigger = NULL; - } - - if (fd < 0) /* Disable only */ - return 0; - - vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)", - pci_name(pdev)); - if (!vdev->ctx[0].name) - return -ENOMEM; - - trigger = eventfd_ctx_fdget(fd); - if (IS_ERR(trigger)) { - kfree(vdev->ctx[0].name); - return PTR_ERR(trigger); - } - - vdev->ctx[0].trigger = trigger; + old = vdev->ctx[0].trigger; - if (!vdev->pci_2_3) - irqflags = 0; + WRITE_ONCE(vdev->ctx[0].trigger, trigger); - ret = request_irq(pdev->irq, vfio_intx_handler, - irqflags, vdev->ctx[0].name, vdev); - if (ret) { - vdev->ctx[0].trigger = NULL; - kfree(vdev->ctx[0].name); - eventfd_ctx_put(trigger); - return ret; + /* Releasing an old ctx requires synchronizing in-flight users */ + if (old) { + synchronize_irq(pdev->irq); + vfio_virqfd_flush_thread(&vdev->ctx[0].unmask); + eventfd_ctx_put(old); } - /* - * INTx disable will stick across the new irq setup, - * disable_irq won't. - */ - spin_lock_irqsave(&vdev->irqlock, flags); - if (!vdev->pci_2_3 && vdev->ctx[0].masked) - disable_irq_nosync(pdev->irq); - spin_unlock_irqrestore(&vdev->irqlock, flags); - return 0; } static void vfio_intx_disable(struct vfio_pci_core_device *vdev) { + struct pci_dev *pdev = vdev->pdev; + vfio_virqfd_disable(&vdev->ctx[0].unmask); vfio_virqfd_disable(&vdev->ctx[0].mask); - vfio_intx_set_signal(vdev, -1); + free_irq(pdev->irq, vdev); + if (vdev->ctx[0].trigger) + eventfd_ctx_put(vdev->ctx[0].trigger); + kfree(vdev->ctx[0].name); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -425,11 +457,11 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_pci_intx_unmask(vdev); + __vfio_pci_intx_unmask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { uint8_t unmask = *(uint8_t *)data; if (unmask) - vfio_pci_intx_unmask(vdev); + __vfio_pci_intx_unmask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; if (fd >= 0) @@ -452,11 +484,11 @@ static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_pci_intx_mask(vdev); + __vfio_pci_intx_mask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { uint8_t mask = *(uint8_t *)data; if (mask) - vfio_pci_intx_mask(vdev); + __vfio_pci_intx_mask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { return -ENOTTY; /* XXX implement me */ } @@ -477,19 +509,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + struct eventfd_ctx *trigger = NULL; int32_t fd = *(int32_t *)data; int ret; - if (is_intx(vdev)) - return vfio_intx_set_signal(vdev, fd); + if (fd >= 0) { + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) + return PTR_ERR(trigger); + } - ret = vfio_intx_enable(vdev); - if (ret) - return ret; + if (is_intx(vdev)) + ret = vfio_intx_set_signal(vdev, trigger); + else + ret = vfio_intx_enable(vdev, trigger); - ret = vfio_intx_set_signal(vdev, fd); - if (ret) - vfio_intx_disable(vdev); + if (ret && trigger) + eventfd_ctx_put(trigger); return ret; } diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index c5b09ec0a3..7f4341a8d7 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -136,6 +136,16 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, return 0; } +/* + * The trigger eventfd is guaranteed valid in the interrupt path + * and protected by the igate mutex when triggered via ioctl. + */ +static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx) +{ + if (likely(irq_ctx->trigger)) + eventfd_signal(irq_ctx->trigger, 1); +} + static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) { struct vfio_platform_irq *irq_ctx = dev_id; @@ -155,7 +165,7 @@ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) spin_unlock_irqrestore(&irq_ctx->lock, flags); if (ret == IRQ_HANDLED) - eventfd_signal(irq_ctx->trigger, 1); + vfio_send_eventfd(irq_ctx); return ret; } @@ -164,22 +174,19 @@ static irqreturn_t vfio_irq_handler(int irq, void *dev_id) { struct vfio_platform_irq *irq_ctx = dev_id; - eventfd_signal(irq_ctx->trigger, 1); + vfio_send_eventfd(irq_ctx); return IRQ_HANDLED; } static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, - int fd, irq_handler_t handler) + int fd) { struct vfio_platform_irq *irq = &vdev->irqs[index]; struct eventfd_ctx *trigger; - int ret; if (irq->trigger) { - irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN); - free_irq(irq->hwirq, irq); - kfree(irq->name); + disable_irq(irq->hwirq); eventfd_ctx_put(irq->trigger); irq->trigger = NULL; } @@ -187,30 +194,20 @@ static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, if (fd < 0) /* Disable only */ return 0; - irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)", - irq->hwirq, vdev->name); - if (!irq->name) - return -ENOMEM; - trigger = eventfd_ctx_fdget(fd); - if (IS_ERR(trigger)) { - kfree(irq->name); + if (IS_ERR(trigger)) return PTR_ERR(trigger); - } irq->trigger = trigger; - irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN); - ret = request_irq(irq->hwirq, handler, 0, irq->name, irq); - if (ret) { - kfree(irq->name); - eventfd_ctx_put(trigger); - irq->trigger = NULL; - return ret; - } - - if (!irq->masked) - enable_irq(irq->hwirq); + /* + * irq->masked effectively provides nested disables within the overall + * enable relative to trigger. Specifically request_irq() is called + * with NO_AUTOEN, therefore the IRQ is initially disabled. The user + * may only further disable the IRQ with a MASK operations because + * irq->masked is initially false. + */ + enable_irq(irq->hwirq); return 0; } @@ -229,7 +226,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, handler = vfio_irq_handler; if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) - return vfio_set_trigger(vdev, index, -1, handler); + return vfio_set_trigger(vdev, index, -1); if (start != 0 || count != 1) return -EINVAL; @@ -237,7 +234,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; - return vfio_set_trigger(vdev, index, fd, handler); + return vfio_set_trigger(vdev, index, fd); } if (flags & VFIO_IRQ_SET_DATA_NONE) { @@ -261,6 +258,14 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, unsigned start, unsigned count, uint32_t flags, void *data) = NULL; + /* + * For compatibility, errors from request_irq() are local to the + * SET_IRQS path and reflected in the name pointer. This allows, + * for example, polling mode fallback for an exclusive IRQ failure. + */ + if (IS_ERR(vdev->irqs[index].name)) + return PTR_ERR(vdev->irqs[index].name); + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { case VFIO_IRQ_SET_ACTION_MASK: func = vfio_platform_set_irq_mask; @@ -281,7 +286,7 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, int vfio_platform_irq_init(struct vfio_platform_device *vdev) { - int cnt = 0, i; + int cnt = 0, i, ret = 0; while (vdev->get_irq(vdev, cnt) >= 0) cnt++; @@ -292,37 +297,70 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) for (i = 0; i < cnt; i++) { int hwirq = vdev->get_irq(vdev, i); + irq_handler_t handler = vfio_irq_handler; - if (hwirq < 0) + if (hwirq < 0) { + ret = -EINVAL; goto err; + } spin_lock_init(&vdev->irqs[i].lock); vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; - if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) + if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) { vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE | VFIO_IRQ_INFO_AUTOMASKED; + handler = vfio_automasked_irq_handler; + } vdev->irqs[i].count = 1; vdev->irqs[i].hwirq = hwirq; vdev->irqs[i].masked = false; + vdev->irqs[i].name = kasprintf(GFP_KERNEL, + "vfio-irq[%d](%s)", hwirq, + vdev->name); + if (!vdev->irqs[i].name) { + ret = -ENOMEM; + goto err; + } + + ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN, + vdev->irqs[i].name, &vdev->irqs[i]); + if (ret) { + kfree(vdev->irqs[i].name); + vdev->irqs[i].name = ERR_PTR(ret); + } } vdev->num_irqs = cnt; return 0; err: + for (--i; i >= 0; i--) { + if (!IS_ERR(vdev->irqs[i].name)) { + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); + kfree(vdev->irqs[i].name); + } + } kfree(vdev->irqs); - return -EINVAL; + return ret; } void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) { int i; - for (i = 0; i < vdev->num_irqs; i++) - vfio_set_trigger(vdev, i, -1, NULL); + for (i = 0; i < vdev->num_irqs; i++) { + vfio_virqfd_disable(&vdev->irqs[i].mask); + vfio_virqfd_disable(&vdev->irqs[i].unmask); + if (!IS_ERR(vdev->irqs[i].name)) { + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); + if (vdev->irqs[i].trigger) + eventfd_ctx_put(vdev->irqs[i].trigger); + kfree(vdev->irqs[i].name); + } + } vdev->num_irqs = 0; kfree(vdev->irqs); diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 414e98d82b..b58ba030e7 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -104,6 +104,13 @@ static void virqfd_inject(struct work_struct *work) virqfd->thread(virqfd->opaque, virqfd->data); } +static void virqfd_flush_inject(struct work_struct *work) +{ + struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject); + + flush_work(&virqfd->inject); +} + int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), @@ -127,6 +134,7 @@ int vfio_virqfd_enable(void *opaque, INIT_WORK(&virqfd->shutdown, virqfd_shutdown); INIT_WORK(&virqfd->inject, virqfd_inject); + INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject); irqfd = fdget(fd); if (!irqfd.file) { @@ -217,6 +225,19 @@ void vfio_virqfd_disable(struct virqfd **pvirqfd) } EXPORT_SYMBOL_GPL(vfio_virqfd_disable); +void vfio_virqfd_flush_thread(struct virqfd **pvirqfd) +{ + unsigned long flags; + + spin_lock_irqsave(&virqfd_lock, flags); + if (*pvirqfd && (*pvirqfd)->thread) + queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject); + spin_unlock_irqrestore(&virqfd_lock, flags); + + flush_workqueue(vfio_irqfd_cleanup_wq); +} +EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread); + module_init(vfio_virqfd_init); module_exit(vfio_virqfd_exit); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 99cdd59f4e..061af5dc92 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2518,9 +2518,19 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) r = vhost_get_avail_idx(vq, &avail_idx); if (unlikely(r)) return false; + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); + if (vq->avail_idx != vq->last_avail_idx) { + /* Since we have updated avail_idx, the following + * call to vhost_get_vq_desc() will read available + * ring entries. Make sure that read happens after + * the avail_idx read. + */ + smp_rmb(); + return false; + } - return vq->avail_idx == vq->last_avail_idx; + return true; } EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 662524574c..26dfc4e5b1 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -2016,8 +2016,8 @@ config FB_COBALT depends on FB && MIPS_COBALT config FB_SH7760 - bool "SH7760/SH7763/SH7720/SH7721 LCDC support" - depends on FB=y && (CPU_SUBTYPE_SH7760 || CPU_SUBTYPE_SH7763 \ + tristate "SH7760/SH7763/SH7720/SH7721 LCDC support" + depends on FB && (CPU_SUBTYPE_SH7760 || CPU_SUBTYPE_SH7763 \ || CPU_SUBTYPE_SH7720 || CPU_SUBTYPE_SH7721) select FB_CFB_FILLRECT select FB_CFB_COPYAREA diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index 1f12c20436..c2a0a936d5 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -149,7 +149,7 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf) unsigned long offset; vm_fault_t ret; - offset = (vmf->address - vmf->vma->vm_start); + offset = vmf->pgoff << PAGE_SHIFT; /* this is a callback we get when userspace first tries to write to the page. we schedule a workqueue. that workqueue diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c index b0e690f410..9ca99da3a5 100644 --- a/drivers/video/fbdev/core/fbmon.c +++ b/drivers/video/fbdev/core/fbmon.c @@ -1311,7 +1311,7 @@ int fb_get_mode(int flags, u32 val, struct fb_var_screeninfo *var, struct fb_inf int fb_videomode_from_videomode(const struct videomode *vm, struct fb_videomode *fbmode) { - unsigned int htotal, vtotal; + unsigned int htotal, vtotal, total; fbmode->xres = vm->hactive; fbmode->left_margin = vm->hback_porch; @@ -1344,8 +1344,9 @@ int fb_videomode_from_videomode(const struct videomode *vm, vtotal = vm->vactive + vm->vfront_porch + vm->vback_porch + vm->vsync_len; /* prevent division by zero */ - if (htotal && vtotal) { - fbmode->refresh = vm->pixelclock / (htotal * vtotal); + total = htotal * vtotal; + if (total) { + fbmode->refresh = vm->pixelclock / total; /* a mode must have htotal and vtotal != 0 or it is invalid */ } else { fbmode->refresh = 0; diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c index 94ebd8af50..224d7c8146 100644 --- a/drivers/video/fbdev/savage/savagefb_driver.c +++ b/drivers/video/fbdev/savage/savagefb_driver.c @@ -2271,7 +2271,10 @@ static int savagefb_probe(struct pci_dev *dev, const struct pci_device_id *id) if (info->var.xres_virtual > 0x1000) info->var.xres_virtual = 0x1000; #endif - savagefb_check_var(&info->var, info); + err = savagefb_check_var(&info->var, info); + if (err) + goto failed; + savagefb_set_fix(info); /* diff --git a/drivers/video/fbdev/sh_mobile_lcdcfb.c b/drivers/video/fbdev/sh_mobile_lcdcfb.c index e33c016c54..74a013c398 100644 --- a/drivers/video/fbdev/sh_mobile_lcdcfb.c +++ b/drivers/video/fbdev/sh_mobile_lcdcfb.c @@ -1577,7 +1577,7 @@ sh_mobile_lcdc_overlay_fb_init(struct sh_mobile_lcdc_overlay *ovl) */ info->fix = sh_mobile_lcdc_overlay_fix; snprintf(info->fix.id, sizeof(info->fix.id), - "SH Mobile LCDC Overlay %u", ovl->index); + "SHMobile ovl %u", ovl->index); info->fix.smem_start = ovl->dma_handle; info->fix.smem_len = ovl->fb_size; info->fix.line_length = ovl->pitch; diff --git a/drivers/video/fbdev/sis/init301.c b/drivers/video/fbdev/sis/init301.c index a8fb41f1a2..0932907200 100644 --- a/drivers/video/fbdev/sis/init301.c +++ b/drivers/video/fbdev/sis/init301.c @@ -172,7 +172,7 @@ static const unsigned char SiS_HiTVGroup3_2[] = { }; /* 301C / 302ELV extended Part2 TV registers (4 tap scaler) */ - +#ifdef CONFIG_FB_SIS_315 static const unsigned char SiS_Part2CLVX_1[] = { 0x00,0x00, 0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E, @@ -245,7 +245,6 @@ static const unsigned char SiS_Part2CLVX_6[] = { /* 1080i */ 0xFF,0xFF, }; -#ifdef CONFIG_FB_SIS_315 /* 661 et al LCD data structure (2.03.00) */ static const unsigned char SiS_LCDStruct661[] = { /* 1024x768 */ diff --git a/drivers/video/fbdev/via/accel.c b/drivers/video/fbdev/via/accel.c index 0a1bc7a4d7..1e04026f08 100644 --- a/drivers/video/fbdev/via/accel.c +++ b/drivers/video/fbdev/via/accel.c @@ -115,7 +115,7 @@ static int hw_bitblt_1(void __iomem *engine, u8 op, u32 width, u32 height, if (op != VIA_BITBLT_FILL) { tmp = src_mem ? 0 : src_addr; - if (dst_addr & 0xE0000007) { + if (tmp & 0xE0000007) { printk(KERN_WARNING "hw_bitblt_1: Unsupported source " "address %X\n", tmp); return -EINVAL; @@ -260,7 +260,7 @@ static int hw_bitblt_2(void __iomem *engine, u8 op, u32 width, u32 height, writel(tmp, engine + 0x18); tmp = src_mem ? 0 : src_addr; - if (dst_addr & 0xE0000007) { + if (tmp & 0xE0000007) { printk(KERN_WARNING "hw_bitblt_2: Unsupported source " "address %X\n", tmp); return -EINVAL; diff --git a/drivers/virt/acrn/acrn_drv.h b/drivers/virt/acrn/acrn_drv.h index 1be54efa66..5663c17ad3 100644 --- a/drivers/virt/acrn/acrn_drv.h +++ b/drivers/virt/acrn/acrn_drv.h @@ -48,6 +48,7 @@ struct vm_memory_region_op { * @reserved: Reserved. * @regions_num: The number of vm_memory_region_op. * @regions_gpa: Physical address of a vm_memory_region_op array. + * @regions_op: Flexible array of vm_memory_region_op. * * HC_VM_SET_MEMORY_REGIONS uses this structure to manage EPT mappings of * multiple memory regions of a User VM. A &struct vm_memory_region_batch @@ -55,10 +56,11 @@ struct vm_memory_region_op { * ACRN Hypervisor. */ struct vm_memory_region_batch { - u16 vmid; - u16 reserved[3]; - u32 regions_num; - u64 regions_gpa; + u16 vmid; + u16 reserved[3]; + u32 regions_num; + u64 regions_gpa; + struct vm_memory_region_op regions_op[]; }; /** diff --git a/drivers/virt/acrn/mm.c b/drivers/virt/acrn/mm.c index 3b1b1e7a84..8ef49d7be4 100644 --- a/drivers/virt/acrn/mm.c +++ b/drivers/virt/acrn/mm.c @@ -155,44 +155,84 @@ int acrn_vm_memseg_unmap(struct acrn_vm *vm, struct acrn_vm_memmap *memmap) int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap) { struct vm_memory_region_batch *regions_info; - int nr_pages, i = 0, order, nr_regions = 0; + int nr_pages, i, order, nr_regions = 0; struct vm_memory_mapping *region_mapping; struct vm_memory_region_op *vm_region; struct page **pages = NULL, *page; void *remap_vaddr; int ret, pinned; u64 user_vm_pa; - unsigned long pfn; struct vm_area_struct *vma; if (!vm || !memmap) return -EINVAL; + /* Get the page number of the map region */ + nr_pages = memmap->len >> PAGE_SHIFT; + if (!nr_pages) + return -EINVAL; + mmap_read_lock(current->mm); vma = vma_lookup(current->mm, memmap->vma_base); if (vma && ((vma->vm_flags & VM_PFNMAP) != 0)) { + unsigned long start_pfn, cur_pfn; + spinlock_t *ptl; + bool writable; + pte_t *ptep; + if ((memmap->vma_base + memmap->len) > vma->vm_end) { mmap_read_unlock(current->mm); return -EINVAL; } - ret = follow_pfn(vma, memmap->vma_base, &pfn); + for (i = 0; i < nr_pages; i++) { + ret = follow_pte(vma->vm_mm, + memmap->vma_base + i * PAGE_SIZE, + &ptep, &ptl); + if (ret) + break; + + cur_pfn = pte_pfn(ptep_get(ptep)); + if (i == 0) + start_pfn = cur_pfn; + writable = !!pte_write(ptep_get(ptep)); + pte_unmap_unlock(ptep, ptl); + + /* Disallow write access if the PTE is not writable. */ + if (!writable && + (memmap->attr & ACRN_MEM_ACCESS_WRITE)) { + ret = -EFAULT; + break; + } + + /* Disallow refcounted pages. */ + if (pfn_valid(cur_pfn) && + !PageReserved(pfn_to_page(cur_pfn))) { + ret = -EFAULT; + break; + } + + /* Disallow non-contiguous ranges. */ + if (cur_pfn != start_pfn + i) { + ret = -EINVAL; + break; + } + } mmap_read_unlock(current->mm); - if (ret < 0) { + + if (ret) { dev_dbg(acrn_dev.this_device, "Failed to lookup PFN at VMA:%pK.\n", (void *)memmap->vma_base); return ret; } return acrn_mm_region_add(vm, memmap->user_vm_pa, - PFN_PHYS(pfn), memmap->len, + PFN_PHYS(start_pfn), memmap->len, ACRN_MEM_TYPE_WB, memmap->attr); } mmap_read_unlock(current->mm); - /* Get the page number of the map region */ - nr_pages = memmap->len >> PAGE_SHIFT; - pages = vzalloc(nr_pages * sizeof(struct page *)); + pages = vzalloc(array_size(nr_pages, sizeof(*pages))); if (!pages) return -ENOMEM; @@ -235,31 +275,28 @@ int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap) mutex_unlock(&vm->regions_mapping_lock); /* Calculate count of vm_memory_region_op */ - while (i < nr_pages) { + for (i = 0; i < nr_pages; i += 1 << order) { page = pages[i]; VM_BUG_ON_PAGE(PageTail(page), page); order = compound_order(page); nr_regions++; - i += 1 << order; } /* Prepare the vm_memory_region_batch */ - regions_info = kzalloc(sizeof(*regions_info) + - sizeof(*vm_region) * nr_regions, - GFP_KERNEL); + regions_info = kzalloc(struct_size(regions_info, regions_op, + nr_regions), GFP_KERNEL); if (!regions_info) { ret = -ENOMEM; goto unmap_kernel_map; } /* Fill each vm_memory_region_op */ - vm_region = (struct vm_memory_region_op *)(regions_info + 1); + vm_region = regions_info->regions_op; regions_info->vmid = vm->vmid; regions_info->regions_num = nr_regions; regions_info->regions_gpa = virt_to_phys(vm_region); user_vm_pa = memmap->user_vm_pa; - i = 0; - while (i < nr_pages) { + for (i = 0; i < nr_pages; i += 1 << order) { u32 region_size; page = pages[i]; @@ -275,7 +312,6 @@ int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap) vm_region++; user_vm_pa += region_size; - i += 1 << order; } /* Inform the ACRN Hypervisor to set up EPT mappings */ diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index c2b733ef95..2f8f3bd56b 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -455,13 +455,19 @@ EXPORT_SYMBOL_GPL(unregister_virtio_device); int virtio_device_freeze(struct virtio_device *dev) { struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); + int ret; virtio_config_disable(dev); dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; - if (drv && drv->freeze) - return drv->freeze(dev); + if (drv && drv->freeze) { + ret = drv->freeze(dev); + if (ret) { + virtio_config_enable(dev); + return ret; + } + } return 0; } diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 1e890ef176..a6f375417f 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -339,8 +339,10 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, vring_interrupt, 0, vp_dev->msix_names[msix_vec], vqs[i]); - if (err) + if (err) { + vp_del_vq(vqs[i]); goto error_find; + } } return 0; diff --git a/drivers/watchdog/bd9576_wdt.c b/drivers/watchdog/bd9576_wdt.c index 0b6999f3b6..f00ea1b4e4 100644 --- a/drivers/watchdog/bd9576_wdt.c +++ b/drivers/watchdog/bd9576_wdt.c @@ -9,8 +9,8 @@ #include #include #include -#include #include +#include #include #include @@ -29,7 +29,6 @@ struct bd9576_wdt_priv { struct gpio_desc *gpiod_en; struct device *dev; struct regmap *regmap; - bool always_running; struct watchdog_device wdd; }; @@ -62,10 +61,7 @@ static int bd9576_wdt_stop(struct watchdog_device *wdd) { struct bd9576_wdt_priv *priv = watchdog_get_drvdata(wdd); - if (!priv->always_running) - bd9576_wdt_disable(priv); - else - set_bit(WDOG_HW_RUNNING, &wdd->status); + bd9576_wdt_disable(priv); return 0; } @@ -202,10 +198,10 @@ static int bd957x_set_wdt_mode(struct bd9576_wdt_priv *priv, int hw_margin, static int bd9576_wdt_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = dev->parent->of_node; struct bd9576_wdt_priv *priv; u32 hw_margin[2]; u32 hw_margin_max = BD957X_WDT_DEFAULT_MARGIN, hw_margin_min = 0; + int count; int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -221,41 +217,49 @@ static int bd9576_wdt_probe(struct platform_device *pdev) return -ENODEV; } - priv->gpiod_en = devm_gpiod_get_from_of_node(dev, dev->parent->of_node, - "rohm,watchdog-enable-gpios", - 0, GPIOD_OUT_LOW, - "watchdog-enable"); + priv->gpiod_en = devm_fwnode_gpiod_get(dev, dev_fwnode(dev->parent), + "rohm,watchdog-enable", + GPIOD_OUT_LOW, + "watchdog-enable"); if (IS_ERR(priv->gpiod_en)) return dev_err_probe(dev, PTR_ERR(priv->gpiod_en), "getting watchdog-enable GPIO failed\n"); - priv->gpiod_ping = devm_gpiod_get_from_of_node(dev, dev->parent->of_node, - "rohm,watchdog-ping-gpios", - 0, GPIOD_OUT_LOW, - "watchdog-ping"); + priv->gpiod_ping = devm_fwnode_gpiod_get(dev, dev_fwnode(dev->parent), + "rohm,watchdog-ping", + GPIOD_OUT_LOW, + "watchdog-ping"); if (IS_ERR(priv->gpiod_ping)) return dev_err_probe(dev, PTR_ERR(priv->gpiod_ping), "getting watchdog-ping GPIO failed\n"); - ret = of_property_read_variable_u32_array(np, "rohm,hw-timeout-ms", - &hw_margin[0], 1, 2); - if (ret < 0 && ret != -EINVAL) - return ret; + count = device_property_count_u32(dev->parent, "rohm,hw-timeout-ms"); + if (count < 0 && count != -EINVAL) + return count; + + if (count > 0) { + if (count > ARRAY_SIZE(hw_margin)) + return -EINVAL; + + ret = device_property_read_u32_array(dev->parent, + "rohm,hw-timeout-ms", + hw_margin, count); + if (ret < 0) + return ret; - if (ret == 1) - hw_margin_max = hw_margin[0]; + if (count == 1) + hw_margin_max = hw_margin[0]; - if (ret == 2) { - hw_margin_max = hw_margin[1]; - hw_margin_min = hw_margin[0]; + if (count == 2) { + hw_margin_max = hw_margin[1]; + hw_margin_min = hw_margin[0]; + } } ret = bd957x_set_wdt_mode(priv, hw_margin_max, hw_margin_min); if (ret) return ret; - priv->always_running = of_property_read_bool(np, "always-running"); - watchdog_set_drvdata(&priv->wdd, priv); priv->wdd.info = &bd957x_wdt_ident; @@ -270,9 +274,6 @@ static int bd9576_wdt_probe(struct platform_device *pdev) watchdog_stop_on_reboot(&priv->wdd); - if (priv->always_running) - bd9576_wdt_start(&priv->wdd); - return devm_watchdog_register_device(dev, &priv->wdd); } diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index daa00f3c5a..7f2ca611a3 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -52,6 +52,8 @@ #define DWDST BIT(1) +#define MAX_HW_ERROR 250 + static int heartbeat = DEFAULT_HEARTBEAT; /* @@ -90,7 +92,7 @@ static int rti_wdt_start(struct watchdog_device *wdd) * to be 50% or less than that; we obviouly want to configure the open * window as large as possible so we select the 50% option. */ - wdd->min_hw_heartbeat_ms = 500 * wdd->timeout; + wdd->min_hw_heartbeat_ms = 520 * wdd->timeout + MAX_HW_ERROR; /* Generate NMI when wdt expires */ writel_relaxed(RTIWWDRX_NMI, wdt->base + RTIWWDRXCTRL); @@ -124,31 +126,33 @@ static int rti_wdt_setup_hw_hb(struct watchdog_device *wdd, u32 wsize) * be petted during the open window; not too early or not too late. * The HW configuration options only allow for the open window size * to be 50% or less than that. + * To avoid any glitches, we accommodate 2% + max hardware error + * safety margin. */ switch (wsize) { case RTIWWDSIZE_50P: - /* 50% open window => 50% min heartbeat */ - wdd->min_hw_heartbeat_ms = 500 * heartbeat; + /* 50% open window => 52% min heartbeat */ + wdd->min_hw_heartbeat_ms = 520 * heartbeat + MAX_HW_ERROR; break; case RTIWWDSIZE_25P: - /* 25% open window => 75% min heartbeat */ - wdd->min_hw_heartbeat_ms = 750 * heartbeat; + /* 25% open window => 77% min heartbeat */ + wdd->min_hw_heartbeat_ms = 770 * heartbeat + MAX_HW_ERROR; break; case RTIWWDSIZE_12P5: - /* 12.5% open window => 87.5% min heartbeat */ - wdd->min_hw_heartbeat_ms = 875 * heartbeat; + /* 12.5% open window => 89.5% min heartbeat */ + wdd->min_hw_heartbeat_ms = 895 * heartbeat + MAX_HW_ERROR; break; case RTIWWDSIZE_6P25: - /* 6.5% open window => 93.5% min heartbeat */ - wdd->min_hw_heartbeat_ms = 935 * heartbeat; + /* 6.5% open window => 95.5% min heartbeat */ + wdd->min_hw_heartbeat_ms = 955 * heartbeat + MAX_HW_ERROR; break; case RTIWWDSIZE_3P125: - /* 3.125% open window => 96.9% min heartbeat */ - wdd->min_hw_heartbeat_ms = 969 * heartbeat; + /* 3.125% open window => 98.9% min heartbeat */ + wdd->min_hw_heartbeat_ms = 989 * heartbeat + MAX_HW_ERROR; break; default: @@ -222,14 +226,6 @@ static int rti_wdt_probe(struct platform_device *pdev) return -EINVAL; } - /* - * If watchdog is running at 32k clock, it is not accurate. - * Adjust frequency down in this case so that we don't pet - * the watchdog too often. - */ - if (wdt->freq < 32768) - wdt->freq = wdt->freq * 9 / 10; - pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); if (ret < 0) { diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index ee691b20d4..04ff194fec 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -936,8 +936,8 @@ static void shutdown_pirq(struct irq_data *data) return; do_mask(info, EVT_MASK_REASON_EXPLICIT); - xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } static void enable_pirq(struct irq_data *data) @@ -981,8 +981,6 @@ static void __unbind_from_irq(unsigned int irq) unsigned int cpu = cpu_from_irq(irq); struct xenbus_device *dev; - xen_evtchn_close(evtchn); - switch (type_from_irq(irq)) { case IRQT_VIRQ: per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; @@ -1000,6 +998,7 @@ static void __unbind_from_irq(unsigned int irq) } xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } xen_free_irq(irq); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 7437b185fa..0c84d41466 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -660,6 +660,7 @@ const struct file_operations v9fs_file_operations = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync, + .setlease = simple_nosetlease, }; const struct file_operations v9fs_file_operations_dotl = { @@ -701,4 +702,5 @@ const struct file_operations v9fs_mmap_file_operations_dotl = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync_dotl, + .setlease = simple_nosetlease, }; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 0d9b7d453a..ef103ef392 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -87,7 +87,7 @@ static int p9mode2perm(struct v9fs_session_info *v9ses, int res; int mode = stat->mode; - res = mode & S_IALLUGO; + res = mode & 0777; /* S_IRWXUGO */ if (v9fs_proto_dotu(v9ses)) { if ((mode & P9_DMSETUID) == P9_DMSETUID) res |= S_ISUID; @@ -178,6 +178,9 @@ int v9fs_uflags2omode(int uflags, int extended) break; } + if (uflags & O_TRUNC) + ret |= P9_OTRUNC; + if (extended) { if (uflags & O_EXCL) ret |= P9_OEXCL; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 7449f7fd47..51ac265398 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -340,6 +340,7 @@ static const struct super_operations v9fs_super_ops = { .alloc_inode = v9fs_alloc_inode, .free_inode = v9fs_free_inode, .statfs = simple_statfs, + .drop_inode = v9fs_drop_inode, .evict_inode = v9fs_evict_inode, .show_options = v9fs_show_options, .umount_begin = v9fs_umount_begin, diff --git a/fs/Kconfig b/fs/Kconfig index 971339ecc1..9ea9614107 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -344,7 +344,7 @@ config LOCKD config LOCKD_V4 bool - depends on NFSD_V3 || NFS_V3 + depends on NFSD || NFS_V3 depends on FILE_LOCKING default y diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index bbb2c210d1..fa8a654314 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -146,6 +146,11 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) put_page(page); if (ret < 0) return ret; + + /* Don't cross a backup volume mountpoint from a backup volume */ + if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL && + ctx->type == AFSVL_BACKVOL) + return -ENODEV; } return 0; diff --git a/fs/aio.c b/fs/aio.c index e24eb82b2b..0f02ea1630 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -564,8 +564,8 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) { - struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw); - struct kioctx *ctx = req->ki_ctx; + struct aio_kiocb *req; + struct kioctx *ctx; unsigned long flags; /* @@ -575,9 +575,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) if (!(iocb->ki_flags & IOCB_AIO_RW)) return; + req = container_of(iocb, struct aio_kiocb, rw); + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; + ctx = req->ki_ctx; + spin_lock_irqsave(&ctx->ctx_lock, flags); list_add_tail(&req->ki_list, &ctx->active_reqs); req->ki_cancel = cancel; diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index cd9202867d..77676716c7 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2315,20 +2315,14 @@ struct btrfs_data_container *init_data_container(u32 total_bytes) size_t alloc_bytes; alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); - data = kvmalloc(alloc_bytes, GFP_KERNEL); + data = kvzalloc(alloc_bytes, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); - if (total_bytes >= sizeof(*data)) { + if (total_bytes >= sizeof(*data)) data->bytes_left = total_bytes - sizeof(*data); - data->bytes_missing = 0; - } else { + else data->bytes_missing = sizeof(*data) - total_bytes; - data->bytes_left = 0; - } - - data->elem_cnt = 0; - data->elem_missed = 0; return data; } diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 5a98c5da12..8d8b455992 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1046,6 +1046,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, if (ret) return ret; + ret = btrfs_record_root_in_trans(trans, node->root); + if (ret) + return ret; ret = btrfs_update_delayed_inode(trans, node->root, path, node); return ret; } diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index fab7eb76e5..58b0f04d71 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -161,8 +161,15 @@ struct dentry *btrfs_get_parent(struct dentry *child) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto fail; + if (ret == 0) { + /* + * Key with offset of -1 found, there would have to exist an + * inode with such number or a root with such id. + */ + ret = -EUCLEAN; + goto fail; + } - BUG_ON(ret == 0); /* Key with offset of -1 found */ if (path->slots[0] == 0) { ret = -ENOENT; goto fail; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c7d8a18daa..07c6ab4ba0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2261,7 +2261,7 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode, */ if (*bits & EXTENT_CLEAR_META_RESV && root != fs_info->tree_root) - btrfs_delalloc_release_metadata(inode, len, false); + btrfs_delalloc_release_metadata(inode, len, true); /* For sanity tests. */ if (btrfs_is_testing(fs_info)) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 34278cb5f9..c50cabf694 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4080,6 +4080,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes) BTRFS_QGROUP_RSV_META_PREALLOC); trace_qgroup_meta_convert(root, num_bytes); qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes); + if (!sb_rdonly(fs_info->sb)) + add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS); } /* diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 0d1715ebde..6ffd34d39e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3972,7 +3972,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, gen = fs_info->last_trans_committed; for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { - bytenr = btrfs_sb_offset(i); + ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr); + if (ret == -ENOENT) + break; + + if (ret) { + spin_lock(&sctx->stat_lock); + sctx->stat.super_errors++; + spin_unlock(&sctx->stat_lock); + continue; + } + if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->commit_total_bytes) break; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9900f879fa..c2842e892e 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -976,7 +976,15 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, ret = PTR_ERR(start); goto out; } - BUG_ON(start < p->buf); + if (unlikely(start < p->buf)) { + btrfs_err(root->fs_info, + "send: path ref buffer underflow for key (%llu %u %llu)", + found_key->objectid, + found_key->type, + found_key->offset); + ret = -EINVAL; + goto out; + } } p->start = start; } else { @@ -7604,8 +7612,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) sctx->waiting_dir_moves = RB_ROOT; sctx->orphan_dirs = RB_ROOT; - sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots), - arg->clone_sources_count + 1, + sctx->clone_roots = kvcalloc(arg->clone_sources_count + 1, + sizeof(*sctx->clone_roots), GFP_KERNEL); if (!sctx->clone_roots) { ret = -ENOMEM; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 99cdd1d6a4..a9b794c471 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1424,6 +1424,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); + btrfs_qgroup_free_meta_all_pertrans(root); spin_unlock(&fs_info->fs_roots_radix_lock); btrfs_free_log(trans, root); @@ -1448,7 +1449,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) if (ret2) return ret2; spin_lock(&fs_info->fs_roots_radix_lock); - btrfs_qgroup_free_meta_all_pertrans(root); } } spin_unlock(&fs_info->fs_roots_radix_lock); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cc18ba50a6..b5e2daf538 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1260,25 +1260,32 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, struct btrfs_device *device; struct btrfs_device *latest_dev = NULL; struct btrfs_device *tmp_device; + int ret = 0; flags |= FMODE_EXCL; list_for_each_entry_safe(device, tmp_device, &fs_devices->devices, dev_list) { - int ret; + int ret2; - ret = btrfs_open_one_device(fs_devices, device, flags, holder); - if (ret == 0 && + ret2 = btrfs_open_one_device(fs_devices, device, flags, holder); + if (ret2 == 0 && (!latest_dev || device->generation > latest_dev->generation)) { latest_dev = device; - } else if (ret == -ENODATA) { + } else if (ret2 == -ENODATA) { fs_devices->num_devices--; list_del(&device->dev_list); btrfs_free_device(device); } + if (ret == 0 && ret2 != 0) + ret = ret2; } - if (fs_devices->open_devices == 0) + + if (fs_devices->open_devices == 0) { + if (ret) + return ret; return -EINVAL; + } fs_devices->opened = 1; fs_devices->latest_dev = latest_dev; @@ -1473,7 +1480,7 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start, if (in_range(physical_start, *start, len) || in_range(*start, physical_start, - physical_end - physical_start)) { + physical_end + 1 - physical_start)) { *start = physical_end + 1; return true; } @@ -3358,7 +3365,18 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info) mutex_unlock(&fs_info->reclaim_bgs_lock); goto error; } - BUG_ON(ret == 0); /* Corruption */ + if (ret == 0) { + /* + * On the first search we would find chunk tree with + * offset -1, which is not possible. On subsequent + * loops this would find an existing item on an invalid + * offset (one less than the previous one, wrong + * alignment and size). + */ + ret = -EUCLEAN; + mutex_unlock(&fs_info->reclaim_bgs_lock); + goto error; + } ret = btrfs_previous_item(chunk_root, path, key.objectid, key.type); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 9bbead15a0..d3adcb9e70 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -374,6 +374,7 @@ cifs_alloc_inode(struct super_block *sb) * server, can not assume caching of file data or metadata. */ cifs_set_oplock_level(cifs_inode, 0); + cifs_inode->lease_granted = false; cifs_inode->flags = 0; spin_lock_init(&cifs_inode->writers_lock); cifs_inode->writers = 0; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a3e4811b78..1cbfb74c53 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1145,7 +1145,7 @@ cifs_demultiplex_thread(void *p) } memalloc_noreclaim_restore(noreclaim_flag); - module_put_and_exit(0); + module_put_and_kthread_exit(0); } /* diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 6347e759b5..fb3651513f 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -672,6 +672,16 @@ static int smb3_fs_context_validate(struct fs_context *fc) /* set the port that we got earlier */ cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port); + if (ctx->uid_specified && !ctx->forceuid_specified) { + ctx->override_uid = 1; + pr_notice("enabling forceuid mount option implicitly because uid= option is specified\n"); + } + + if (ctx->gid_specified && !ctx->forcegid_specified) { + ctx->override_gid = 1; + pr_notice("enabling forcegid mount option implicitly because gid= option is specified\n"); + } + if (ctx->override_uid && !ctx->uid_specified) { ctx->override_uid = 0; pr_notice("ignoring forceuid mount option specified with no uid= option\n"); @@ -903,12 +913,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->override_uid = 0; else ctx->override_uid = 1; + ctx->forceuid_specified = true; break; case Opt_forcegid: if (result.negated) ctx->override_gid = 0; else ctx->override_gid = 1; + ctx->forcegid_specified = true; break; case Opt_perm: if (result.negated) diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index 3cf8d62351..74bb19ec7c 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -152,6 +152,8 @@ enum cifs_param { }; struct smb3_fs_context { + bool forceuid_specified; + bool forcegid_specified; bool uid_specified; bool cruid_specified; bool gid_specified; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 3fe41964c0..7f9f68c00e 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -300,9 +300,11 @@ write_tag_66_packet(char *signature, u8 cipher_code, * | Key Identifier Size | 1 or 2 bytes | * | Key Identifier | arbitrary | * | File Encryption Key Size | 1 or 2 bytes | + * | Cipher Code | 1 byte | * | File Encryption Key | arbitrary | + * | Checksum | 2 bytes | */ - data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); + data_len = (8 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); *packet = kmalloc(data_len, GFP_KERNEL); message = *packet; if (!message) { diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1c254094c4..b60edddf17 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -832,6 +832,34 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep return res; } +/* + * The ffd.file pointer may be in the process of being torn down due to + * being closed, but we may not have finished eventpoll_release() yet. + * + * Normally, even with the atomic_long_inc_not_zero, the file may have + * been free'd and then gotten re-allocated to something else (since + * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU). + * + * But for epoll, users hold the ep->mtx mutex, and as such any file in + * the process of being free'd will block in eventpoll_release_file() + * and thus the underlying file allocation will not be free'd, and the + * file re-use cannot happen. + * + * For the same reason we can avoid a rcu_read_lock() around the + * operation - 'ffd.file' cannot go away even if the refcount has + * reached zero (but we must still not call out to ->poll() functions + * etc). + */ +static struct file *epi_fget(const struct epitem *epi) +{ + struct file *file; + + file = epi->ffd.file; + if (!atomic_long_inc_not_zero(&file->f_count)) + file = NULL; + return file; +} + /* * Differs from ep_eventpoll_poll() in that internal callers already have * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested() @@ -840,14 +868,22 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth) { - struct file *file = epi->ffd.file; + struct file *file = epi_fget(epi); __poll_t res; + /* + * We could return EPOLLERR | EPOLLHUP or something, but let's + * treat this more as "file doesn't exist, poll didn't happen". + */ + if (!file) + return 0; + pt->_key = epi->event.events; if (!is_file_epoll(file)) res = vfs_poll(file, pt); else res = __ep_eventpoll_poll(file, pt, depth); + fput(file); return res & epi->event.events; } diff --git a/fs/exec.c b/fs/exec.c index 719115f232..03516b704d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -888,6 +888,7 @@ int transfer_args_to_stack(struct linux_binprm *bprm, goto out; } + bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE; *sp_location = sp; out: diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 3ef80d000e..584bdd912c 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -18,7 +18,7 @@ #include #include -#define dprintk(fmt, args...) do{}while(0) +#define dprintk(fmt, args...) pr_debug(fmt, ##args) static int get_name(const struct path *path, char *name, struct dentry *child); @@ -132,8 +132,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, inode_unlock(dentry->d_inode); if (IS_ERR(parent)) { - dprintk("%s: get_parent of %ld failed, err %d\n", - __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); + dprintk("get_parent of %lu failed, err %ld\n", + dentry->d_inode->i_ino, PTR_ERR(parent)); return parent; } @@ -147,7 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, dprintk("%s: found name: %s\n", __func__, nbuf); tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { - dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); + dprintk("lookup failed: %ld\n", PTR_ERR(tmp)); err = PTR_ERR(tmp); goto out_err; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a254c2ba03..1145664a0b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2950,7 +2950,10 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? sg.info.bb_counters[i] : 0); - seq_puts(seq, " ]\n"); + seq_puts(seq, " ]"); + if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info)) + seq_puts(seq, " Block bitmap corrupted!"); + seq_puts(seq, "\n"); return 0; } @@ -4696,10 +4699,16 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) .fe_len = ac->ac_g_ex.fe_len, }; loff_t orig_goal_end = extent_logical_end(sbi, &ex); + loff_t o_ex_end = extent_logical_end(sbi, &ac->ac_o_ex); - /* we can't allocate as much as normalizer wants. - * so, found space must get proper lstart - * to cover original request */ + /* + * We can't allocate as much as normalizer wants, so we try + * to get proper lstart to cover the original request, except + * when the goal doesn't cover the original request as below: + * + * orig_ex:2045/2055(10), isize:8417280 -> normalized:0/2048 + * best_ex:0/200(200) -> adjusted: 1848/2048(200) + */ BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); @@ -4711,7 +4720,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) * 1. Check if best ex can be kept at end of goal and still * cover original start * 2. Else, check if best ex can be kept at start of goal and - * still cover original start + * still cover original end * 3. Else, keep the best ex at start of original request. */ ex.fe_len = ac->ac_b_ex.fe_len; @@ -4721,7 +4730,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) goto adjust_bex; ex.fe_logical = ac->ac_g_ex.fe_logical; - if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex)) + if (o_ex_end <= extent_logical_end(sbi, &ex)) goto adjust_bex; ex.fe_logical = ac->ac_o_ex.fe_logical; @@ -4729,7 +4738,6 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) ac->ac_b_ex.fe_logical = ex.fe_logical; BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); - BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end); } @@ -5612,8 +5620,73 @@ static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb, return ret; } -static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, - struct ext4_allocation_request *ar, int *errp); +/* + * Simple allocator for Ext4 fast commit replay path. It searches for blocks + * linearly starting at the goal block and also excludes the blocks which + * are going to be in use after fast commit replay. + */ +static ext4_fsblk_t +ext4_mb_new_blocks_simple(struct ext4_allocation_request *ar, int *errp) +{ + struct buffer_head *bitmap_bh; + struct super_block *sb = ar->inode->i_sb; + struct ext4_sb_info *sbi = EXT4_SB(sb); + ext4_group_t group, nr; + ext4_grpblk_t blkoff; + ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); + ext4_grpblk_t i = 0; + ext4_fsblk_t goal, block; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + + goal = ar->goal; + if (goal < le32_to_cpu(es->s_first_data_block) || + goal >= ext4_blocks_count(es)) + goal = le32_to_cpu(es->s_first_data_block); + + ar->len = 0; + ext4_get_group_no_and_offset(sb, goal, &group, &blkoff); + for (nr = ext4_get_groups_count(sb); nr > 0; nr--) { + bitmap_bh = ext4_read_block_bitmap(sb, group); + if (IS_ERR(bitmap_bh)) { + *errp = PTR_ERR(bitmap_bh); + pr_warn("Failed to read block bitmap\n"); + return 0; + } + + while (1) { + i = mb_find_next_zero_bit(bitmap_bh->b_data, max, + blkoff); + if (i >= max) + break; + if (ext4_fc_replay_check_excluded(sb, + ext4_group_first_block_no(sb, group) + + EXT4_C2B(sbi, i))) { + blkoff = i + 1; + } else + break; + } + brelse(bitmap_bh); + if (i < max) + break; + + if (++group >= ext4_get_groups_count(sb)) + group = 0; + + blkoff = 0; + } + + if (i >= max) { + *errp = -ENOSPC; + return 0; + } + + block = ext4_group_first_block_no(sb, group) + EXT4_C2B(sbi, i); + ext4_mb_mark_bb(sb, block, 1, 1); + ar->len = 1; + + *errp = 0; + return block; +} /* * Main entry point into mballoc to allocate blocks @@ -5638,7 +5711,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, trace_ext4_request_blocks(ar); if (sbi->s_mount_state & EXT4_FC_REPLAY) - return ext4_mb_new_blocks_simple(handle, ar, errp); + return ext4_mb_new_blocks_simple(ar, errp); /* Allow to use superuser reservation for quota file */ if (ext4_is_quota_file(ar->inode)) @@ -5868,69 +5941,6 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, return 0; } -/* - * Simple allocator for Ext4 fast commit replay path. It searches for blocks - * linearly starting at the goal block and also excludes the blocks which - * are going to be in use after fast commit replay. - */ -static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, - struct ext4_allocation_request *ar, int *errp) -{ - struct buffer_head *bitmap_bh; - struct super_block *sb = ar->inode->i_sb; - ext4_group_t group; - ext4_grpblk_t blkoff; - ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); - ext4_grpblk_t i = 0; - ext4_fsblk_t goal, block; - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - - goal = ar->goal; - if (goal < le32_to_cpu(es->s_first_data_block) || - goal >= ext4_blocks_count(es)) - goal = le32_to_cpu(es->s_first_data_block); - - ar->len = 0; - ext4_get_group_no_and_offset(sb, goal, &group, &blkoff); - for (; group < ext4_get_groups_count(sb); group++) { - bitmap_bh = ext4_read_block_bitmap(sb, group); - if (IS_ERR(bitmap_bh)) { - *errp = PTR_ERR(bitmap_bh); - pr_warn("Failed to read block bitmap\n"); - return 0; - } - - ext4_get_group_no_and_offset(sb, - max(ext4_group_first_block_no(sb, group), goal), - NULL, &blkoff); - while (1) { - i = mb_find_next_zero_bit(bitmap_bh->b_data, max, - blkoff); - if (i >= max) - break; - if (ext4_fc_replay_check_excluded(sb, - ext4_group_first_block_no(sb, group) + i)) { - blkoff = i + 1; - } else - break; - } - brelse(bitmap_bh); - if (i < max) - break; - } - - if (group >= ext4_get_groups_count(sb) || i >= max) { - *errp = -ENOSPC; - return 0; - } - - block = ext4_group_first_block_no(sb, group) + i; - ext4_mb_mark_bb(sb, block, 1, 1); - ar->len = 1; - - return block; -} - static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block, unsigned long count) { diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index bf048cbf39..3f40746c90 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -181,8 +181,8 @@ struct ext4_allocation_context { ext4_group_t ac_last_optimal_group; __u32 ac_groups_considered; __u32 ac_flags; /* allocation hints */ + __u32 ac_groups_linear_remaining; __u16 ac_groups_scanned; - __u16 ac_groups_linear_remaining; __u16 ac_found; __u16 ac_tail; __u16 ac_buddy; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 14c977e1e4..e9501fb284 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2901,7 +2901,7 @@ static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, NULL, 0, NULL, EXT4_HT_DIR, - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + + EXT4_MAXQUOTAS_TRANS_BLOCKS(dir->i_sb) + 4 + EXT4_XATTR_TRANS_BLOCKS); handle = ext4_journal_current_handle(); err = PTR_ERR(inode); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 70d4f6e3aa..4687d59720 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1559,7 +1559,8 @@ static int ext4_flex_group_add(struct super_block *sb, int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); - int meta_bg = ext4_has_feature_meta_bg(sb); + int meta_bg = ext4_has_feature_meta_bg(sb) && + gdb_num >= le32_to_cpu(es->s_first_meta_bg); sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr - ext4_group_first_block_no(sb, 0); sector_t old_gdb = 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 65716a1705..b09b7a6b7a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "ext4.h" #include "ext4_extents.h" /* Needed for trace points definition */ @@ -753,6 +754,8 @@ void __ext4_error(struct super_block *sb, const char *function, sb->s_id, function, line, current->comm, &vaf); va_end(args); } + fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED); + ext4_handle_error(sb, force_ro, error, 0, block, function, line); } @@ -783,6 +786,8 @@ void __ext4_error_inode(struct inode *inode, const char *function, current->comm, &vaf); va_end(args); } + fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED); + ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block, function, line); } @@ -821,6 +826,8 @@ void __ext4_error_file(struct file *file, const char *function, current->comm, path, &vaf); va_end(args); } + fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED); + ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block, function, line); } @@ -888,6 +895,7 @@ void __ext4_std_error(struct super_block *sb, const char *function, printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", sb->s_id, function, line, errstr); } + fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED); ext4_handle_error(sb, false, -errno, 0, 0, function, line); } @@ -5843,7 +5851,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) - ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user"); + ext4_abort(sb, ESHUTDOWN, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); @@ -6162,6 +6170,10 @@ static int ext4_write_dquot(struct dquot *dquot) if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to commit dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -6178,6 +6190,10 @@ static int ext4_acquire_dquot(struct dquot *dquot) if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_acquire(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to acquire dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -6197,6 +6213,10 @@ static int ext4_release_dquot(struct dquot *dquot) return PTR_ERR(handle); } ret = dquot_release(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to release dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index f79705af3a..37f3c2ebe6 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -3075,8 +3075,10 @@ ext4_xattr_block_cache_find(struct inode *inode, bh = ext4_sb_bread(inode->i_sb, ce->e_value, REQ_PRIO); if (IS_ERR(bh)) { - if (PTR_ERR(bh) == -ENOMEM) + if (PTR_ERR(bh) == -ENOMEM) { + mb_cache_entry_put(ea_block_cache, ce); return NULL; + } bh = NULL; EXT4_ERROR_INODE(inode, "block %lu read error", (unsigned long)ce->e_value); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7d3e7418d8..71a3714419 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -792,7 +792,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) */ head = &im->ino_list; - /* loop for each orphan inode entry and write them in Jornal block */ + /* loop for each orphan inode entry and write them in journal block */ list_for_each_entry(orphan, head, list) { if (!page) { page = f2fs_grab_meta_page(sbi, start_blk++); @@ -1124,7 +1124,7 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type, } else { /* * We should submit bio, since it exists several - * wribacking dentry pages in the freeing inode. + * writebacking dentry pages in the freeing inode. */ f2fs_submit_merged_write(sbi, DATA); cond_resched(); diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 88799c6ebd..be46dc4152 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1247,7 +1247,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, loff_t psize; int i, err; - /* we should bypass data pages to proceed the kworkder jobs */ + /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { mapping_set_error(cc->rpages[0]->mapping, -EIO); goto out_free; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a8e99da8ed..fa86eaf1d6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2412,7 +2412,7 @@ static int f2fs_mpage_readpages(struct inode *inode, #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { - /* there are remained comressed pages, submit them */ + /* there are remained compressed pages, submit them */ if (!f2fs_cluster_can_merge_page(&cc, page->index)) { ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, @@ -2811,7 +2811,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, trace_f2fs_writepage(page, DATA); - /* we should bypass data pages to proceed the kworkder jobs */ + /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { mapping_set_error(page->mapping, -EIO); /* @@ -2938,7 +2938,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, redirty_out: redirty_page_for_writepage(wbc, page); /* - * pageout() in MM traslates EAGAIN, so calls handle_write_error() + * pageout() in MM translates EAGAIN, so calls handle_write_error() * -> mapping_set_error() -> set_bit(AS_EIO, ...). * file_write_and_wait_range() will see EIO error, which is critical * to return value of fsync() followed by atomic_write failure to user. @@ -2972,7 +2972,7 @@ static int f2fs_write_data_page(struct page *page, } /* - * This function was copied from write_cche_pages from mm/page-writeback.c. + * This function was copied from write_cache_pages from mm/page-writeback.c. * The major change is making write step of cold data page separately from * warm/hot data page. */ diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 6a9ab5c119..30b8924d14 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -112,7 +112,7 @@ struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, * @prev_ex: extent before ofs * @next_ex: extent after ofs * @insert_p: insert point for new extent at ofs - * in order to simpfy the insertion after. + * in order to simplify the insertion after. * tree must stay unchanged between lookup and insertion. */ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, @@ -573,7 +573,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (!en) en = next_en; - /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */ + /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */ while (en && en->ei.fofs < end) { unsigned int org_end; int parts = 0; /* # of parts current extent split into */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 647d53df6a..e49fca9daf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2587,16 +2587,6 @@ static inline struct page *f2fs_pagecache_get_page( return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); } -static inline void f2fs_copy_page(struct page *src, struct page *dst) -{ - char *src_kaddr = kmap(src); - char *dst_kaddr = kmap(dst); - - memcpy(dst_kaddr, src_kaddr, PAGE_SIZE); - kunmap(dst); - kunmap(src); -} - static inline void f2fs_put_page(struct page *page, int unlock) { if (!page) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 378ab6bd1b..be9536815e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -304,7 +304,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, * for OPU case, during fsync(), node can be persisted before * data when lower device doesn't support write barrier, result * in data corruption after SPO. - * So for strict fsync mode, force to use atomic write sematics + * So for strict fsync mode, force to use atomic write semantics * to keep write order in between data/node and last node to * avoid potential data corruption. */ @@ -899,9 +899,14 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, ATTR_GID | ATTR_TIMES_SET)))) return -EPERM; - if ((attr->ia_valid & ATTR_SIZE) && - !f2fs_is_compress_backend_ready(inode)) - return -EOPNOTSUPP; + if ((attr->ia_valid & ATTR_SIZE)) { + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && + !IS_ALIGNED(attr->ia_size, + F2FS_BLK_TO_BYTES(F2FS_I(inode)->i_cluster_size))) + return -EINVAL; + } err = setattr_prepare(&init_user_ns, dentry, attr); if (err) @@ -1276,7 +1281,10 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_page(psrc, 1); return PTR_ERR(pdst); } - f2fs_copy_page(psrc, pdst); + + f2fs_wait_on_page_writeback(pdst, DATA, true, true); + + memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); set_page_dirty(pdst); set_page_private_gcing(pdst); f2fs_put_page(pdst, 1); @@ -1757,11 +1765,6 @@ static long f2fs_fallocate(struct file *file, int mode, (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; - if (f2fs_compressed_file(inode) && - (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | - FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) - return -EOPNOTSUPP; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)) @@ -1769,6 +1772,17 @@ static long f2fs_fallocate(struct file *file, int mode, inode_lock(inode); + /* + * Pinned file should not support partial truncation since the block + * can be used by applications. + */ + if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { + ret = -EOPNOTSUPP; + goto out; + } + ret = file_modified(file); if (ret) goto out; @@ -1804,7 +1818,7 @@ static long f2fs_fallocate(struct file *file, int mode, static int f2fs_release_file(struct inode *inode, struct file *filp) { /* - * f2fs_relase_file is called at every close calls. So we should + * f2fs_release_file is called at every close calls. So we should * not drop any inmemory pages by close called by other process. */ if (!(filp->f_mode & FMODE_WRITE) || @@ -2781,7 +2795,8 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out; } - if (f2fs_compressed_file(src) || f2fs_compressed_file(dst)) { + if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || + f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { ret = -EOPNOTSUPP; goto out_unlock; } @@ -3433,12 +3448,9 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) int ret; int writecount; - if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3446,7 +3458,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) return ret; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -3457,7 +3469,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -3482,9 +3495,12 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3502,6 +3518,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; @@ -3612,12 +3630,9 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) unsigned int reserved_blocks = 0; int ret; - if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3628,11 +3643,12 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) goto out; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); inode_lock(inode); - if (!is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto unlock_inode; } @@ -3646,9 +3662,12 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3666,6 +3685,8 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; @@ -4017,10 +4038,7 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); file_start_write(filp); inode_lock(inode); @@ -4030,7 +4048,8 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -4089,10 +4108,7 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); file_start_write(filp); inode_lock(inode); @@ -4102,7 +4118,8 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 94e21136d5..ddb297409f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -330,6 +330,12 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } } + if (fi->i_xattr_nid && f2fs_check_nid_range(sbi, fi->i_xattr_nid)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_xattr_nid: %u, run fsck to fix.", + __func__, inode->i_ino, fi->i_xattr_nid); + return false; + } + return true; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0d6906644f..80bc386ec6 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -959,7 +959,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, /* * If new_inode is null, the below renaming flow will - * add a link in old_dir which can conver inline_dir. + * add a link in old_dir which can convert inline_dir. * After then, if we failed to get the entry due to other * reasons like ENOMEM, we had to remove the new entry. * Instead of adding such the error handling routine, let's diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index dc85dd5531..b675888754 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1300,6 +1300,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) } if (unlikely(new_ni.blk_addr != NULL_ADDR)) { err = -EFSCORRUPTED; + dec_valid_node_count(sbi, dn->inode, !ofs); set_sbi_flag(sbi, SBI_NEED_FSCK); goto fail; } @@ -1325,7 +1326,6 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) if (ofs == 0) inc_valid_inode_count(sbi); return page; - fail: clear_node_page_dirty(page); f2fs_put_page(page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b454496ca6..1c69dc91c3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3772,7 +3772,7 @@ void f2fs_wait_on_page_writeback(struct page *page, /* submit cached LFS IO */ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); - /* sbumit cached IPU IO */ + /* submit cached IPU IO */ f2fs_submit_merged_ipu_write(sbi, NULL, page); if (ordered) { wait_on_page_writeback(page); diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index af191371c3..bab63eeaf9 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -130,6 +130,12 @@ fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp, fid->parent_i_gen = parent->i_generation; type = FILEID_FAT_WITH_PARENT; *lenp = FAT_FID_SIZE_WITH_PARENT; + } else { + /* + * We need to initialize this field because the fh is actually + * 12 bytes long + */ + fid->parent_i_pos_hi = 0; } return type; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 4ea52906ae..44d1c8cc58 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -390,6 +390,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name goto out_put_forget; if (fuse_invalid_attr(&outarg->attr)) goto out_put_forget; + if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) { + pr_warn_once("root generation should be zero\n"); + outarg->generation = 0; + } *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, &outarg->attr, entry_attr_timeout(outarg), diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 4b8f094345..55b7ca26fb 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -899,7 +899,6 @@ static inline bool fuse_stale_inode(const struct inode *inode, int generation, static inline void fuse_make_bad(struct inode *inode) { - remove_inode_hash(inode); set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 97dc24557b..62b4143ccf 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -413,8 +413,11 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, } else if (fuse_stale_inode(inode, generation, attr)) { /* nodeid was reused, any I/O on the old inode should fail */ fuse_make_bad(inode); - iput(inode); - goto retry; + if (inode != d_inode(sb->s_root)) { + remove_inode_hash(inode); + iput(inode); + goto retry; + } } fi = get_fuse_inode(inode); spin_lock(&fi->lock); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 0ec1eaf338..d2011c3c33 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1704,7 +1704,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) struct buffer_head *dibh, *bh; struct gfs2_holder rd_gh; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; - u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift; + unsigned int bsize = 1 << bsize_shift; + u64 lblock = (offset + bsize - 1) >> bsize_shift; __u16 start_list[GFS2_MAX_META_HEIGHT]; __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; unsigned int start_aligned, end_aligned; @@ -1715,7 +1716,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) u64 prev_bnr = 0; __be64 *start, *end; - if (offset >= maxsize) { + if (offset + bsize - 1 >= maxsize) { /* * The starting point lies beyond the allocated meta-data; * there are no blocks do deallocate. diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e85ef6b147..7fed3beb5e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -807,11 +807,13 @@ __acquires(&gl->gl_lockref.lock) } if (sdp->sd_lockstruct.ls_ops->lm_lock) { + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + /* lock_dlm */ ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && target == LM_ST_UNLOCKED && - test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) { + test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { finish_xmote(gl, target); gfs2_glock_queue_work(gl, 0); } else if (ret) { diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 558932ad89..5a4b3550d8 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -82,6 +82,9 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); + + if (gfs2_withdrawing(sdp)) + gfs2_withdraw(sdp); } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index cf345a86ef..9cdece4928 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -351,7 +351,6 @@ int gfs2_withdraw(struct gfs2_sbd *sdp) fs_err(sdp, "telling LM to unmount\n"); lm->lm_unmount(sdp); } - set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); fs_err(sdp, "File system withdrawn\n"); dump_stack(); clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 0c6eacfcbe..07252d2a7f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -908,8 +908,22 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) * we then decide whether to use the Joliet descriptor. */ inode = isofs_iget(s, sbi->s_firstdatazone, 0); - if (IS_ERR(inode)) - goto out_no_root; + + /* + * Fix for broken CDs with a corrupt root inode but a correct Joliet + * root directory. + */ + if (IS_ERR(inode)) { + if (joliet_level && sbi->s_firstdatazone != first_data_zone) { + printk(KERN_NOTICE + "ISOFS: root inode is unusable. " + "Disabling Rock Ridge and switching to Joliet."); + sbi->s_rock = 0; + inode = NULL; + } else { + goto out_no_root; + } + } /* * Fix for broken CDs with Rock Ridge and empty ISO root directory but diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index acb4492f59..5a31220f96 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -1111,6 +1111,9 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, return rc; request = PAD(sizeof(struct jffs2_raw_xattr) + strlen(xname) + 1 + size); + if (request > c->sector_size - c->cleanmarker_size) + return -ERANGE; + rc = jffs2_reserve_space(c, request, &length, ALLOC_NORMAL, JFFS2_SUMMARY_XATTR_SIZE); if (rc) { diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h index ecffcb8a15..dc30cd0f6a 100644 --- a/fs/ksmbd/ksmbd_netlink.h +++ b/fs/ksmbd/ksmbd_netlink.h @@ -166,7 +166,8 @@ struct ksmbd_share_config_response { __u16 force_uid; __u16 force_gid; __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; - __u32 reserved[112]; /* Reserved room */ + __u32 reserved[111]; /* Reserved room */ + __u32 payload_sz; __u32 veto_list_sz; __s8 ____payload[]; }; diff --git a/fs/ksmbd/mgmt/share_config.c b/fs/ksmbd/mgmt/share_config.c index 328a412259..a2f0a2edce 100644 --- a/fs/ksmbd/mgmt/share_config.c +++ b/fs/ksmbd/mgmt/share_config.c @@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, share->name = kstrdup(name, GFP_KERNEL); if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) { - share->path = kstrdup(ksmbd_share_config_path(resp), + int path_len = PATH_MAX; + + if (resp->payload_sz) + path_len = resp->payload_sz - resp->veto_list_sz; + + share->path = kstrndup(ksmbd_share_config_path(resp), path_len, GFP_KERNEL); if (share->path) share->path_sz = strlen(share->path); diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c index 11b201e6ee..63b01f7d97 100644 --- a/fs/ksmbd/server.c +++ b/fs/ksmbd/server.c @@ -167,20 +167,17 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, int rc; bool is_chained = false; - if (conn->ops->allocate_rsp_buf(work)) - return; - if (conn->ops->is_transform_hdr && conn->ops->is_transform_hdr(work->request_buf)) { rc = conn->ops->decrypt_req(work); - if (rc < 0) { - conn->ops->set_rsp_status(work, STATUS_DATA_ERROR); - goto send; - } - + if (rc < 0) + return; work->encrypted = true; } + if (conn->ops->allocate_rsp_buf(work)) + return; + rc = conn->ops->init_rsp_hdr(work); if (rc) { /* either uid or tid is not correct */ diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c index c69943d965..d0db9f32c4 100644 --- a/fs/ksmbd/smb2ops.c +++ b/fs/ksmbd/smb2ops.c @@ -229,6 +229,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn) conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION) conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || + (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && + conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; } @@ -276,11 +281,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn) conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_DIRECTORY_LEASING; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || - (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && - conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 0613b8d144..57f59172d8 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -536,6 +536,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) if (cmd == SMB2_QUERY_INFO_HE) { struct smb2_query_info_req *req; + if (get_rfc1002_len(work->request_buf) < + offsetof(struct smb2_query_info_req, OutputBufferLength)) + return -EINVAL; + req = smb2_get_msg(work->request_buf); if ((req->InfoType == SMB2_O_INFO_FILE && (req->FileInfoClass == FILE_FULL_EA_INFORMATION || @@ -5581,8 +5585,9 @@ static int smb2_rename(struct ksmbd_work *work, if (!file_info->ReplaceIfExists) flags = RENAME_NOREPLACE; - smb_break_all_levII_oplock(work, fp, 0); rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags); + if (!rc) + smb_break_all_levII_oplock(work, fp, 0); out: kfree(new_name); return rc; @@ -5759,15 +5764,21 @@ static int set_file_allocation_info(struct ksmbd_work *work, loff_t alloc_blks; struct inode *inode; + struct kstat stat; int rc; if (!(fp->daccess & FILE_WRITE_DATA_LE)) return -EACCES; + rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT); + if (rc) + return rc; + alloc_blks = (le64_to_cpu(file_alloc_info->AllocationSize) + 511) >> 9; inode = file_inode(fp->filp); - if (alloc_blks > inode->i_blocks) { + if (alloc_blks > stat.blocks) { smb_break_all_levII_oplock(work, fp, 1); rc = vfs_fallocate(fp->filp, FALLOC_FL_KEEP_SIZE, 0, alloc_blks * 512); @@ -5775,7 +5786,7 @@ static int set_file_allocation_info(struct ksmbd_work *work, pr_err("vfs_fallocate is failed : %d\n", rc); return rc; } - } else if (alloc_blks < inode->i_blocks) { + } else if (alloc_blks < stat.blocks) { loff_t size; /* diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c index 2c9662e327..d62ebbff1e 100644 --- a/fs/ksmbd/transport_ipc.c +++ b/fs/ksmbd/transport_ipc.c @@ -65,6 +65,7 @@ struct ipc_msg_table_entry { struct hlist_node ipc_table_hlist; void *response; + unsigned int msg_sz; }; static struct delayed_work ipc_timer_work; @@ -274,6 +275,7 @@ static int handle_response(int type, void *payload, size_t sz) } memcpy(entry->response, payload, sz); + entry->msg_sz = sz; wake_up_interruptible(&entry->wait); ret = 0; break; @@ -452,6 +454,34 @@ static int ipc_msg_send(struct ksmbd_ipc_msg *msg) return ret; } +static int ipc_validate_msg(struct ipc_msg_table_entry *entry) +{ + unsigned int msg_sz = entry->msg_sz; + + if (entry->type == KSMBD_EVENT_RPC_REQUEST) { + struct ksmbd_rpc_command *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz; + } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) { + struct ksmbd_spnego_authen_response *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_spnego_authen_response) + + resp->session_key_len + resp->spnego_blob_len; + } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) { + struct ksmbd_share_config_response *resp = entry->response; + + if (resp->payload_sz) { + if (resp->payload_sz < resp->veto_list_sz) + return -EINVAL; + + msg_sz = sizeof(struct ksmbd_share_config_response) + + resp->payload_sz; + } + } + + return entry->msg_sz != msg_sz ? -EINVAL : 0; +} + static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle) { struct ipc_msg_table_entry entry; @@ -476,6 +506,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle ret = wait_event_interruptible_timeout(entry.wait, entry.response != NULL, IPC_WAIT_TIMEOUT); + if (entry.response) { + ret = ipc_validate_msg(&entry); + if (ret) { + kvfree(entry.response); + entry.response = NULL; + } + } out: down_write(&ipc_msg_table_lock); hash_del(&entry.ipc_table_hlist); diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 173a488bfe..7afb2412c4 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -745,10 +745,15 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, goto out4; } + /* + * explicitly handle file overwrite case, for compatibility with + * filesystems that may not support rename flags (e.g: fuse) + */ if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) { err = -EEXIST; goto out4; } + flags &= ~(RENAME_NOREPLACE); if (old_child == trap) { err = -EINVAL; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index f802223e71..cdc8e12cda 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -164,7 +164,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, host->h_addrbuf = nsm->sm_addrbuf; host->net = ni->net; host->h_cred = get_cred(ni->cred); - strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename)); + strscpy(host->nodename, utsname()->nodename, sizeof(host->nodename)); out: return host; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index b632be3ad5..5579e67da1 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -54,13 +54,9 @@ EXPORT_SYMBOL_GPL(nlmsvc_ops); static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; -static struct task_struct *nlmsvc_task; -static struct svc_rqst *nlmsvc_rqst; +static struct svc_serv *nlmsvc_serv; unsigned long nlmsvc_timeout; -static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); -static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); - unsigned int lockd_net_id; /* @@ -184,6 +180,10 @@ lockd(void *vrqstp) nlm_shutdown_hosts(); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); + + dprintk("lockd_down: service stopped\n"); + + svc_exit_thread(rqstp); return 0; } @@ -196,8 +196,8 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name, xprt = svc_find_xprt(serv, name, net, family, 0); if (xprt == NULL) - return svc_create_xprt(serv, name, net, family, port, - SVC_SOCK_DEFAULTS, cred); + return svc_xprt_create(serv, name, net, family, port, + SVC_SOCK_DEFAULTS, cred); svc_xprt_put(xprt); return 0; } @@ -247,7 +247,8 @@ static int make_socks(struct svc_serv *serv, struct net *net, if (warned++ == 0) printk(KERN_WARNING "lockd_up: makesock failed, error=%d\n", err); - svc_shutdown_net(serv, net); + svc_xprt_destroy_all(serv, net); + svc_rpcb_cleanup(serv, net); return err; } @@ -285,13 +286,12 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) nlm_shutdown_hosts_net(net); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); - svc_shutdown_net(serv, net); - dprintk("%s: per-net data destroyed; net=%x\n", - __func__, net->ns.inum); + svc_xprt_destroy_all(serv, net); + svc_rpcb_cleanup(serv, net); } } else { - pr_err("%s: no users! task=%p, net=%x\n", - __func__, nlmsvc_task, net->ns.inum); + pr_err("%s: no users! net=%x\n", + __func__, net->ns.inum); BUG(); } } @@ -302,20 +302,16 @@ static int lockd_inetaddr_event(struct notifier_block *this, struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sockaddr_in sin; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nlm_ntf_refcnt)) + if (event != NETDEV_DOWN) goto out; - if (nlmsvc_rqst) { + if (nlmsvc_serv) { dprintk("lockd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ifa->ifa_local; - svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, - (struct sockaddr *)&sin); + svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin); } - atomic_dec(&nlm_ntf_refcnt); - wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -332,21 +328,17 @@ static int lockd_inet6addr_event(struct notifier_block *this, struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sockaddr_in6 sin6; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nlm_ntf_refcnt)) + if (event != NETDEV_DOWN) goto out; - if (nlmsvc_rqst) { + if (nlmsvc_serv) { dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ifa->addr; if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) sin6.sin6_scope_id = ifa->idev->dev->ifindex; - svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, - (struct sockaddr *)&sin6); + svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin6); } - atomic_dec(&nlm_ntf_refcnt); - wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -357,86 +349,14 @@ static struct notifier_block lockd_inet6addr_notifier = { }; #endif -static void lockd_unregister_notifiers(void) -{ - unregister_inetaddr_notifier(&lockd_inetaddr_notifier); -#if IS_ENABLED(CONFIG_IPV6) - unregister_inet6addr_notifier(&lockd_inet6addr_notifier); -#endif - wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0); -} - -static void lockd_svc_exit_thread(void) -{ - atomic_dec(&nlm_ntf_refcnt); - lockd_unregister_notifiers(); - svc_exit_thread(nlmsvc_rqst); -} - -static int lockd_start_svc(struct svc_serv *serv) +static int lockd_get(void) { + struct svc_serv *serv; int error; - if (nlmsvc_rqst) + if (nlmsvc_serv) { + nlmsvc_users++; return 0; - - /* - * Create the kernel thread and wait for it to start. - */ - nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); - if (IS_ERR(nlmsvc_rqst)) { - error = PTR_ERR(nlmsvc_rqst); - printk(KERN_WARNING - "lockd_up: svc_rqst allocation failed, error=%d\n", - error); - lockd_unregister_notifiers(); - goto out_rqst; - } - - atomic_inc(&nlm_ntf_refcnt); - svc_sock_update_bufs(serv); - serv->sv_maxconn = nlm_max_connections; - - nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name); - if (IS_ERR(nlmsvc_task)) { - error = PTR_ERR(nlmsvc_task); - printk(KERN_WARNING - "lockd_up: kthread_run failed, error=%d\n", error); - goto out_task; - } - nlmsvc_rqst->rq_task = nlmsvc_task; - wake_up_process(nlmsvc_task); - - dprintk("lockd_up: service started\n"); - return 0; - -out_task: - lockd_svc_exit_thread(); - nlmsvc_task = NULL; -out_rqst: - nlmsvc_rqst = NULL; - return error; -} - -static const struct svc_serv_ops lockd_sv_ops = { - .svo_shutdown = svc_rpcb_cleanup, - .svo_enqueue_xprt = svc_xprt_do_enqueue, -}; - -static struct svc_serv *lockd_create_svc(void) -{ - struct svc_serv *serv; - - /* - * Check whether we're already up and running. - */ - if (nlmsvc_rqst) { - /* - * Note: increase service usage, because later in case of error - * svc_destroy() will be called. - */ - svc_get(nlmsvc_rqst->rq_server); - return nlmsvc_rqst->rq_server; } /* @@ -451,17 +371,44 @@ static struct svc_serv *lockd_create_svc(void) nlm_timeout = LOCKD_DFLT_TIMEO; nlmsvc_timeout = nlm_timeout * HZ; - serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops); + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, lockd); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } + + serv->sv_maxconn = nlm_max_connections; + error = svc_set_num_threads(serv, NULL, 1); + /* The thread now holds the only reference */ + svc_put(serv); + if (error < 0) + return error; + + nlmsvc_serv = serv; register_inetaddr_notifier(&lockd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) register_inet6addr_notifier(&lockd_inet6addr_notifier); #endif dprintk("lockd_up: service created\n"); - return serv; + nlmsvc_users++; + return 0; +} + +static void lockd_put(void) +{ + if (WARN(nlmsvc_users <= 0, "lockd_down: no users!\n")) + return; + if (--nlmsvc_users) + return; + + unregister_inetaddr_notifier(&lockd_inetaddr_notifier); +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&lockd_inet6addr_notifier); +#endif + + svc_set_num_threads(nlmsvc_serv, NULL, 0); + nlmsvc_serv = NULL; + dprintk("lockd_down: service destroyed\n"); } /* @@ -469,36 +416,21 @@ static struct svc_serv *lockd_create_svc(void) */ int lockd_up(struct net *net, const struct cred *cred) { - struct svc_serv *serv; int error; mutex_lock(&nlmsvc_mutex); - serv = lockd_create_svc(); - if (IS_ERR(serv)) { - error = PTR_ERR(serv); - goto err_create; - } + error = lockd_get(); + if (error) + goto err; - error = lockd_up_net(serv, net, cred); + error = lockd_up_net(nlmsvc_serv, net, cred); if (error < 0) { - lockd_unregister_notifiers(); - goto err_put; + lockd_put(); + goto err; } - error = lockd_start_svc(serv); - if (error < 0) { - lockd_down_net(serv, net); - goto err_put; - } - nlmsvc_users++; - /* - * Note: svc_serv structures have an initial use count of 1, - * so we exit through here on both success and failure. - */ -err_put: - svc_destroy(serv); -err_create: +err: mutex_unlock(&nlmsvc_mutex); return error; } @@ -511,27 +443,8 @@ void lockd_down(struct net *net) { mutex_lock(&nlmsvc_mutex); - lockd_down_net(nlmsvc_rqst->rq_server, net); - if (nlmsvc_users) { - if (--nlmsvc_users) - goto out; - } else { - printk(KERN_ERR "lockd_down: no users! task=%p\n", - nlmsvc_task); - BUG(); - } - - if (!nlmsvc_task) { - printk(KERN_ERR "lockd_down: no lockd running.\n"); - BUG(); - } - kthread_stop(nlmsvc_task); - dprintk("lockd_down: service stopped\n"); - lockd_svc_exit_thread(); - dprintk("lockd_down: service destroyed\n"); - nlmsvc_task = NULL; - nlmsvc_rqst = NULL; -out: + lockd_down_net(nlmsvc_serv, net); + lockd_put(); mutex_unlock(&nlmsvc_mutex); } EXPORT_SYMBOL_GPL(lockd_down); @@ -780,11 +693,9 @@ module_exit(exit_nlm); static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp) { const struct svc_procedure *procp = rqstp->rq_procinfo; - struct kvec *argv = rqstp->rq_arg.head; - struct kvec *resv = rqstp->rq_res.head; svcxdr_init_decode(rqstp); - if (!procp->pc_decode(rqstp, argv->iov_base)) + if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; *statp = procp->pc_func(rqstp); @@ -794,7 +705,7 @@ static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp) return 1; svcxdr_init_encode(rqstp); - if (!procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) + if (!procp->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; return 1; diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 1c9214801e..e318d55e4c 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -52,6 +52,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, *filp = file; /* Set up the missing parts of the file_lock structure */ + lock->fl.fl_flags = FL_POSIX; lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; lock->fl.fl_start = (loff_t)lock->lock_start; @@ -95,6 +96,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; + struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST4 called\n"); @@ -104,6 +106,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; + test_owner = argp->lock.fl.fl_owner; /* Now check for conflicting locks */ resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie); if (resp->status == nlm_drop_reply) @@ -111,7 +114,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); - nlmsvc_release_lockowner(&argp->lock); + nlmsvc_put_lockowner(test_owner); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -521,6 +524,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "NULL", @@ -530,6 +534,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_testres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+2+No+Rg, .pc_name = "TEST", @@ -539,6 +544,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_lockargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "LOCK", @@ -548,6 +554,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_cancargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "CANCEL", @@ -557,6 +564,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_unlockargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "UNLOCK", @@ -566,6 +574,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "GRANTED", @@ -575,6 +584,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "TEST_MSG", @@ -584,6 +594,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_lockargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "LOCK_MSG", @@ -593,6 +604,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_cancargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "CANCEL_MSG", @@ -602,6 +614,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_unlockargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNLOCK_MSG", @@ -611,6 +624,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "GRANTED_MSG", @@ -620,6 +634,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "TEST_RES", @@ -629,6 +644,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "LOCK_RES", @@ -638,6 +654,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "CANCEL_RES", @@ -647,6 +664,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNLOCK_RES", @@ -656,6 +674,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_res, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "GRANTED_RES", @@ -665,6 +684,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_reboot, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_reboot), + .pc_argzero = sizeof(struct nlm_reboot), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "SM_NOTIFY", @@ -674,6 +694,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, .pc_name = "UNUSED", @@ -683,6 +704,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, .pc_name = "UNUSED", @@ -692,6 +714,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, .pc_name = "UNUSED", @@ -701,6 +724,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_shareargs, .pc_encode = nlm4svc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, .pc_name = "SHARE", @@ -710,6 +734,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_shareargs, .pc_encode = nlm4svc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, .pc_name = "UNSHARE", @@ -719,6 +744,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_lockargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "NM_LOCK", @@ -728,6 +754,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_notify, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "FREE_ALL", diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e9b85d8fd5..4e30f3c509 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -340,7 +340,7 @@ nlmsvc_get_lockowner(struct nlm_lockowner *lockowner) return lockowner; } -static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) +void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) { if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) return; @@ -470,8 +470,10 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_host *host, struct nlm_lock *lock, int wait, struct nlm_cookie *cookie, int reclaim) { - struct nlm_block *block = NULL; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct inode *inode = nlmsvc_file_inode(file); +#endif + struct nlm_block *block = NULL; int error; int mode; int async_block = 0; @@ -484,7 +486,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, (long long)lock->fl.fl_end, wait); - if (inode->i_sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS) { + if (nlmsvc_file_file(file)->f_op->lock) { async_block = wait; wait = 0; } @@ -588,7 +590,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, int error; int mode; __be32 ret; - struct nlm_lockowner *test_owner; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", nlmsvc_file_inode(file)->i_sb->s_id, @@ -602,9 +603,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - /* If there's a conflicting lock, remember to clean up the test lock */ - test_owner = (struct nlm_lockowner *)lock->fl.fl_owner; - mode = lock_to_openmode(&lock->fl); error = vfs_test_lock(file->f_file[mode], &lock->fl); if (error) { @@ -633,10 +631,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, conflock->fl.fl_end = lock->fl.fl_end; locks_release_private(&lock->fl); - /* Clean up the test lock */ - lock->fl.fl_owner = NULL; - nlmsvc_put_lockowner(test_owner); - ret = nlm_lck_denied; out: return ret; @@ -665,11 +659,13 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) nlmsvc_cancel_blocked(net, file, lock); lock->fl.fl_type = F_UNLCK; - if (file->f_file[O_RDONLY]) - error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, + lock->fl.fl_file = file->f_file[O_RDONLY]; + if (lock->fl.fl_file) + error = vfs_lock_file(lock->fl.fl_file, F_SETLK, &lock->fl, NULL); - if (file->f_file[O_WRONLY]) - error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, + lock->fl.fl_file = file->f_file[O_WRONLY]; + if (lock->fl.fl_file) + error |= vfs_lock_file(lock->fl.fl_file, F_SETLK, &lock->fl, NULL); return (error < 0)? nlm_lck_denied_nolocks : nlm_granted; @@ -703,9 +699,10 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l block = nlmsvc_lookup_block(file, lock); mutex_unlock(&file->f_mutex); if (block != NULL) { - mode = lock_to_openmode(&lock->fl); - vfs_cancel_lock(block->b_file->f_file[mode], - &block->b_call->a_args.lock.fl); + struct file_lock *fl = &block->b_call->a_args.lock.fl; + + mode = lock_to_openmode(fl); + vfs_cancel_lock(block->b_file->f_file[mode], fl); status = nlmsvc_unlink_block(block); nlmsvc_release_block(block); } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 99696d3f6d..2a615032f5 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -77,6 +77,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ mode = lock_to_openmode(&lock->fl); + lock->fl.fl_flags = FL_POSIX; lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; @@ -116,6 +117,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; + struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST called\n"); @@ -125,6 +127,8 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; + test_owner = argp->lock.fl.fl_owner; + /* Now check for conflicting locks */ resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie)); if (resp->status == nlm_drop_reply) @@ -133,7 +137,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) dprintk("lockd: TEST status %d vers %d\n", ntohl(resp->status), rqstp->rq_vers); - nlmsvc_release_lockowner(&argp->lock); + nlmsvc_put_lockowner(test_owner); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -554,6 +558,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "NULL", @@ -563,6 +568,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_testres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+2+No+Rg, .pc_name = "TEST", @@ -572,6 +578,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_lockargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "LOCK", @@ -581,6 +588,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_cancargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "CANCEL", @@ -590,6 +598,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_unlockargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "UNLOCK", @@ -599,6 +608,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "GRANTED", @@ -608,6 +618,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "TEST_MSG", @@ -617,6 +628,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_lockargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "LOCK_MSG", @@ -626,6 +638,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_cancargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "CANCEL_MSG", @@ -635,6 +648,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_unlockargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNLOCK_MSG", @@ -644,6 +658,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "GRANTED_MSG", @@ -653,6 +668,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "TEST_RES", @@ -662,6 +678,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "LOCK_RES", @@ -671,6 +688,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "CANCEL_RES", @@ -680,6 +698,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNLOCK_RES", @@ -689,6 +708,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_res, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "GRANTED_RES", @@ -698,6 +718,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_reboot, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_reboot), + .pc_argzero = sizeof(struct nlm_reboot), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "SM_NOTIFY", @@ -707,6 +728,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNUSED", @@ -716,6 +738,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNUSED", @@ -725,6 +748,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNUSED", @@ -734,6 +758,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_shareargs, .pc_encode = nlmsvc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, .pc_name = "SHARE", @@ -743,6 +768,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_shareargs, .pc_encode = nlmsvc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, .pc_name = "UNSHARE", @@ -752,6 +778,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_lockargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "NM_LOCK", @@ -761,6 +788,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_notify, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, .pc_name = "FREE_ALL", diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 3515f17eaf..e3b6229e7a 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -210,7 +210,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); struct nlm_host *lockhost; if (!flctx || list_empty_careful(&flctx->flc_posix)) @@ -265,7 +265,7 @@ nlm_file_inuse(struct nlm_file *file) { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) return 1; diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 9235e60b17..2fb5748dae 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -145,137 +145,131 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp) * Decode Call arguments */ -int -nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int -nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + return true; } -int -nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; argp->monitor = 1; /* monitor client by default */ - return 1; + return true; } -int -nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + return true; } -int -nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; argp->lock.fl.fl_type = F_UNLCK; - return 1; + return true; } -int -nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_res *resp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_decode_stats(xdr, &resp->status)) - return 0; + return false; - return 1; + return true; } -int -nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_reboot *argp = rqstp->rq_argp; + __be32 *p; u32 len; if (xdr_stream_decode_u32(xdr, &len) < 0) - return 0; + return false; if (len > SM_MAXSTRLEN) - return 0; + return false; p = xdr_inline_decode(xdr, len); if (!p) - return 0; + return false; argp->len = len; argp->mon = (char *)p; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; p = xdr_inline_decode(xdr, SM_PRIV_SIZE); if (!p) - return 0; + return false; memcpy(&argp->priv.data, p, sizeof(argp->priv.data)); - return 1; + return true; } -int -nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; @@ -284,35 +278,34 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) lock->svid = ~(u32)0; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (!svcxdr_decode_fhandle(xdr, &lock->fh)) - return 0; + return false; if (!svcxdr_decode_owner(xdr, &lock->oh)) - return 0; + return false; /* XXX: Range checks are missing in the original code */ if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0) - return 0; + return false; - return 1; + return true; } -int -nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; - return 1; + return true; } @@ -320,45 +313,42 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p) * Encode Reply results */ -int -nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int -nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && svcxdr_encode_testrply(xdr, resp); } -int -nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && svcxdr_encode_stats(xdr, resp->status); } -int -nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; if (!svcxdr_encode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_encode_stats(xdr, resp->status)) - return 0; + return false; /* sequence */ if (xdr_stream_encode_u32(xdr, 0) < 0) - return 0; + return false; - return 1; + return true; } diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index b303ecd74f..5fcbf30cd2 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -140,136 +140,131 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp) * Decode Call arguments */ -int -nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int -nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + return true; } -int -nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; argp->monitor = 1; /* monitor client by default */ - return 1; + return true; } -int -nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + + return true; } -int -nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; argp->lock.fl.fl_type = F_UNLCK; - return 1; + return true; } -int -nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_res *resp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_decode_stats(xdr, &resp->status)) - return 0; + return false; - return 1; + return true; } -int -nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_reboot *argp = rqstp->rq_argp; + __be32 *p; u32 len; if (xdr_stream_decode_u32(xdr, &len) < 0) - return 0; + return false; if (len > SM_MAXSTRLEN) - return 0; + return false; p = xdr_inline_decode(xdr, len); if (!p) - return 0; + return false; argp->len = len; argp->mon = (char *)p; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; p = xdr_inline_decode(xdr, SM_PRIV_SIZE); if (!p) - return 0; + return false; memcpy(&argp->priv.data, p, sizeof(argp->priv.data)); - return 1; + return true; } -int -nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; @@ -278,35 +273,34 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) lock->svid = ~(u32)0; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (!svcxdr_decode_fhandle(xdr, &lock->fh)) - return 0; + return false; if (!svcxdr_decode_owner(xdr, &lock->oh)) - return 0; + return false; /* XXX: Range checks are missing in the original code */ if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0) - return 0; + return false; - return 1; + return true; } -int -nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; - return 1; + return true; } @@ -314,45 +308,42 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p) * Encode Reply results */ -int -nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int -nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && svcxdr_encode_testrply(xdr, resp); } -int -nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && svcxdr_encode_stats(xdr, resp->status); } -int -nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; if (!svcxdr_encode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_encode_stats(xdr, resp->status)) - return 0; + return false; /* sequence */ if (xdr_stream_encode_u32(xdr, 0) < 0) - return 0; + return false; - return 1; + return true; } diff --git a/fs/locks.c b/fs/locks.c index 4899a4666f..77781b71bc 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -251,7 +251,7 @@ locks_get_lock_context(struct inode *inode, int type) struct file_lock_context *ctx; /* paired with cmpxchg() below */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (likely(ctx) || type == F_UNLCK) goto out; @@ -270,7 +270,7 @@ locks_get_lock_context(struct inode *inode, int type) */ if (cmpxchg(&inode->i_flctx, NULL, ctx)) { kmem_cache_free(flctx_cache, ctx); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); } out: trace_locks_get_lock_context(inode, type, ctx); @@ -323,7 +323,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list, void locks_free_lock_context(struct inode *inode) { - struct file_lock_context *ctx = inode->i_flctx; + struct file_lock_context *ctx = locks_inode_context(inode); if (unlikely(ctx)) { locks_check_ctx_lists(inode); @@ -376,6 +376,34 @@ void locks_release_private(struct file_lock *fl) } EXPORT_SYMBOL_GPL(locks_release_private); +/** + * locks_owner_has_blockers - Check for blocking lock requests + * @flctx: file lock context + * @owner: lock owner + * + * Return values: + * %true: @owner has at least one blocker + * %false: @owner has no blockers + */ +bool locks_owner_has_blockers(struct file_lock_context *flctx, + fl_owner_t owner) +{ + struct file_lock *fl; + + spin_lock(&flctx->flc_lock); + list_for_each_entry(fl, &flctx->flc_posix, fl_list) { + if (fl->fl_owner != owner) + continue; + if (!list_empty(&fl->fl_blocked_requests)) { + spin_unlock(&flctx->flc_lock); + return true; + } + } + spin_unlock(&flctx->flc_lock); + return false; +} +EXPORT_SYMBOL_GPL(locks_owner_has_blockers); + /* Free a lock which is not in use. */ void locks_free_lock(struct file_lock *fl) { @@ -954,19 +982,32 @@ posix_test_lock(struct file *filp, struct file_lock *fl) struct file_lock *cfl; struct file_lock_context *ctx; struct inode *inode = locks_inode(filp); + void *owner; + void (*func)(void); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->fl_type = F_UNLCK; return; } +retry: spin_lock(&ctx->flc_lock); list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { - if (posix_locks_conflict(fl, cfl)) { - locks_copy_conflock(fl, cfl); - goto out; + if (!posix_locks_conflict(fl, cfl)) + continue; + if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable + && (*cfl->fl_lmops->lm_lock_expirable)(cfl)) { + owner = cfl->fl_lmops->lm_mod_owner; + func = cfl->fl_lmops->lm_expire_lock; + __module_get(owner); + spin_unlock(&ctx->flc_lock); + (*func)(); + module_put(owner); + goto retry; } + locks_copy_conflock(fl, cfl); + goto out; } fl->fl_type = F_UNLCK; out: @@ -1140,6 +1181,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, int error; bool added = false; LIST_HEAD(dispose); + void *owner; + void (*func)(void); ctx = locks_get_lock_context(inode, request->fl_type); if (!ctx) @@ -1158,6 +1201,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, new_fl2 = locks_alloc_lock(); } +retry: percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); /* @@ -1169,6 +1213,17 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, list_for_each_entry(fl, &ctx->flc_posix, fl_list) { if (!posix_locks_conflict(request, fl)) continue; + if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable + && (*fl->fl_lmops->lm_lock_expirable)(fl)) { + owner = fl->fl_lmops->lm_mod_owner; + func = fl->fl_lmops->lm_expire_lock; + __module_get(owner); + spin_unlock(&ctx->flc_lock); + percpu_up_read(&file_rwsem); + (*func)(); + module_put(owner); + goto retry; + } if (conflock) locks_copy_conflock(conflock, fl); error = -EAGAIN; @@ -1522,7 +1577,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) new_fl->fl_flags = type; /* typically we will check that ctx is non-NULL before calling */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { WARN_ON_ONCE(1); goto free_lock; @@ -1627,7 +1682,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time) struct file_lock_context *ctx; struct file_lock *fl; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { spin_lock(&ctx->flc_lock); fl = list_first_entry_or_null(&ctx->flc_lease, @@ -1673,7 +1728,7 @@ int fcntl_getlease(struct file *filp) int type = F_UNLCK; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -1862,7 +1917,7 @@ static int generic_delete_lease(struct file *filp, void *owner) struct file_lock_context *ctx; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { trace_generic_delete_lease(inode, NULL); return error; @@ -2596,7 +2651,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty(&ctx->flc_posix)) return; @@ -2669,7 +2724,7 @@ void locks_remove_file(struct file *filp) { struct file_lock_context *ctx; - ctx = smp_load_acquire(&locks_inode(filp)->i_flctx); + ctx = locks_inode_context(locks_inode(filp)); if (!ctx) return; @@ -2716,7 +2771,7 @@ bool vfs_inode_has_locks(struct inode *inode) struct file_lock_context *ctx; bool ret; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) return false; @@ -2907,7 +2962,7 @@ void show_fd_locks(struct seq_file *f, struct file_lock_context *ctx; int id = 0; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) return; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 86d856de13..8fe143cad4 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -45,18 +44,18 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) int ret; struct nfs_net *nn = net_generic(net, nfs_net_id); - ret = svc_create_xprt(serv, "tcp", net, PF_INET, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, - cred); + ret = svc_xprt_create(serv, "tcp", net, PF_INET, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, + cred); if (ret <= 0) goto out_err; nn->nfs_callback_tcpport = ret; dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", nn->nfs_callback_tcpport, PF_INET, net->ns.inum); - ret = svc_create_xprt(serv, "tcp", net, PF_INET6, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, - cred); + ret = svc_xprt_create(serv, "tcp", net, PF_INET6, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, + cred); if (ret > 0) { nn->nfs_callback_tcpport6 = ret; dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", @@ -81,9 +80,6 @@ nfs4_callback_svc(void *vrqstp) set_freezable(); while (!kthread_freezable_should_stop(NULL)) { - - if (signal_pending(current)) - flush_signals(current); /* * Listen for a request on the socket */ @@ -92,8 +88,8 @@ nfs4_callback_svc(void *vrqstp) continue; svc_process(rqstp); } + svc_exit_thread(rqstp); - module_put_and_exit(0); return 0; } @@ -113,11 +109,7 @@ nfs41_callback_svc(void *vrqstp) set_freezable(); while (!kthread_freezable_should_stop(NULL)) { - - if (signal_pending(current)) - flush_signals(current); - - prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE); spin_lock_bh(&serv->sv_cb_lock); if (!list_empty(&serv->sv_cb_list)) { req = list_first_entry(&serv->sv_cb_list, @@ -132,12 +124,12 @@ nfs41_callback_svc(void *vrqstp) } else { spin_unlock_bh(&serv->sv_cb_lock); if (!kthread_should_stop()) - schedule(); + freezable_schedule(); finish_wait(&serv->sv_cb_waitq, &wq); } } + svc_exit_thread(rqstp); - module_put_and_exit(0); return 0; } @@ -169,12 +161,12 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, if (nrservs < NFS4_MIN_NR_CALLBACK_THREADS) nrservs = NFS4_MIN_NR_CALLBACK_THREADS; - if (serv->sv_nrthreads-1 == nrservs) + if (serv->sv_nrthreads == nrservs) return 0; - ret = serv->sv_ops->svo_setup(serv, NULL, nrservs); + ret = svc_set_num_threads(serv, NULL, nrservs); if (ret) { - serv->sv_ops->svo_setup(serv, NULL, 0); + svc_set_num_threads(serv, NULL, 0); return ret; } dprintk("nfs_callback_up: service started\n"); @@ -189,7 +181,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc return; dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum); - svc_shutdown_net(serv, net); + svc_xprt_destroy_all(serv, net); } static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, @@ -232,59 +224,17 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, return ret; } -static const struct svc_serv_ops nfs40_cb_sv_ops = { - .svo_function = nfs4_callback_svc, - .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads_sync, - .svo_module = THIS_MODULE, -}; -#if defined(CONFIG_NFS_V4_1) -static const struct svc_serv_ops nfs41_cb_sv_ops = { - .svo_function = nfs41_callback_svc, - .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads_sync, - .svo_module = THIS_MODULE, -}; - -static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { - [0] = &nfs40_cb_sv_ops, - [1] = &nfs41_cb_sv_ops, -}; -#else -static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { - [0] = &nfs40_cb_sv_ops, - [1] = NULL, -}; -#endif - static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; - const struct svc_serv_ops *sv_ops; + int (*threadfn)(void *data); struct svc_serv *serv; /* * Check whether we're already up and running. */ - if (cb_info->serv) { - /* - * Note: increase service usage, because later in case of error - * svc_destroy() will be called. - */ - svc_get(cb_info->serv); - return cb_info->serv; - } - - switch (minorversion) { - case 0: - sv_ops = nfs4_cb_sv_ops[0]; - break; - default: - sv_ops = nfs4_cb_sv_ops[1]; - } - - if (sv_ops == NULL) - return ERR_PTR(-ENOTSUPP); + if (cb_info->serv) + return svc_get(cb_info->serv); /* * Sanity check: if there's no task, @@ -294,7 +244,16 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", cb_info->users); - serv = svc_create_pooled(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops); + threadfn = nfs4_callback_svc; +#if defined(CONFIG_NFS_V4_1) + if (minorversion) + threadfn = nfs41_callback_svc; +#else + if (minorversion) + return ERR_PTR(-ENOTSUPP); +#endif + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, + threadfn); if (!serv) { printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); return ERR_PTR(-ENOMEM); @@ -335,16 +294,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) goto err_start; cb_info->users++; - /* - * svc_create creates the svc_serv with sv_nrthreads == 1, and then - * svc_prepare_thread increments that. So we need to call svc_destroy - * on both success and failure so that the refcount is 1 when the - * thread exits. - */ err_net: if (!cb_info->users) cb_info->serv = NULL; - svc_destroy(serv); + svc_put(serv); err_create: mutex_unlock(&nfs_callback_mutex); return ret; @@ -369,8 +322,8 @@ void nfs_callback_down(int minorversion, struct net *net) cb_info->users--; if (cb_info->users == 0) { svc_get(serv); - serv->sv_ops->svo_setup(serv, NULL, 0); - svc_destroy(serv); + svc_set_num_threads(serv, NULL, 0); + svc_put(serv); dprintk("nfs_callback_down: service destroyed\n"); cb_info->serv = NULL; } diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index ea17085ef8..d0cccddb7d 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -67,9 +67,9 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp) * svc_process_common() looks for an XDR encoder to know when * not to drop a Reply. */ -static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p) +static bool nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return xdr_ressize_check(rqstp, p); + return true; } static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, @@ -1065,6 +1065,7 @@ static const struct svc_procedure nfs4_callback_procedures1[] = { .pc_func = nfs4_callback_compound, .pc_encode = nfs4_encode_void, .pc_argsize = 256, + .pc_argzero = 256, .pc_ressize = 256, .pc_xdrressize = NFS4_CALLBACK_BUFSIZE, .pc_name = "COMPOUND", diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 090b16890e..9e3a3570ef 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -73,7 +73,6 @@ const struct rpc_program nfs_program = { .number = NFS_PROGRAM, .nrvers = ARRAY_SIZE(nfs_version), .version = nfs_version, - .stats = &nfs_rpcstat, .pipe_dir_name = NFS_PIPE_DIRNAME, }; @@ -500,6 +499,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, const struct nfs_client_initdata *cl_init, rpc_authflavor_t flavor) { + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { .net = clp->cl_net, @@ -511,6 +511,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, .servername = clp->cl_hostname, .nodename = cl_init->nodename, .program = &nfs_program, + .stats = &nn->rpcstats, .version = clp->rpc_ops->version, .authflavor = flavor, .cred = cl_init->cred, @@ -1129,6 +1130,8 @@ void nfs_clients_init(struct net *net) #endif spin_lock_init(&nn->nfs_client_lock); nn->boot_time = ktime_get_real(); + memset(&nn->rpcstats, 0, sizeof(nn->rpcstats)); + nn->rpcstats.program = &nfs_program; nfs_netns_sysfs_setup(nn, net); } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bbe2a5cc49..3185899676 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -678,10 +678,17 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) LIST_HEAD(mds_list); nfs_init_cinfo_from_dreq(&cinfo, dreq); + nfs_commit_begin(cinfo.mds); nfs_scan_commit(dreq->inode, &mds_list, &cinfo); res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); - if (res < 0) /* res == -ENOMEM */ - nfs_direct_write_reschedule(dreq); + if (res < 0) { /* res == -ENOMEM */ + spin_lock(&dreq->lock); + if (dreq->flags == 0) + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&dreq->lock); + } + if (nfs_commit_end(cinfo.mds)) + nfs_direct_write_complete(dreq); } static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) diff --git a/fs/nfs/export.c b/fs/nfs/export.c index d772c20bbf..eafa9d7b09 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -178,7 +178,10 @@ const struct export_operations nfs_export_ops = { .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, .fetch_iversion = nfs_fetch_iversion, - .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| - EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| - EXPORT_OP_NOATOMIC_ATTR|EXPORT_OP_SYNC_LOCKS, + .flags = EXPORT_OP_NOWCC | + EXPORT_OP_NOSUBTREECHK | + EXPORT_OP_CLOSE_BEFORE_UNLINK | + EXPORT_OP_REMOTE_FS | + EXPORT_OP_NOATOMIC_ATTR | + EXPORT_OP_FLUSH_ON_CLOSE, }; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d8f01d222c..48ade92d4c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2360,12 +2360,21 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { + struct nfs_net *nn = net_generic(net, nfs_net_id); + nfs_clients_init(net); + + if (!rpc_proc_register(net, &nn->rpcstats)) { + nfs_clients_exit(net); + return -ENOMEM; + } + return nfs_fs_proc_net_init(net); } static void nfs_net_exit(struct net *net) { + rpc_proc_unregister(net, "nfs"); nfs_fs_proc_net_exit(net); nfs_clients_exit(net); } @@ -2424,15 +2433,12 @@ static int __init init_nfs_fs(void) if (err) goto out1; - rpc_proc_register(&init_net, &nfs_rpcstat); - err = register_nfs_fs(); if (err) goto out0; return 0; out0: - rpc_proc_unregister(&init_net, "nfs"); nfs_destroy_directcache(); out1: nfs_destroy_writepagecache(); @@ -2465,7 +2471,6 @@ static void __exit exit_nfs_fs(void) nfs_destroy_nfspagecache(); nfs_fscache_unregister(); unregister_pernet_subsys(&nfs_net_ops); - rpc_proc_unregister(&init_net, "nfs"); unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2ceb4b98ec..f82264fcbb 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -428,8 +428,6 @@ int nfs_try_get_tree(struct fs_context *); int nfs_get_tree_common(struct fs_context *); void nfs_kill_super(struct super_block *); -extern struct rpc_stat nfs_rpcstat; - extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); extern bool nfs_sb_active(struct super_block *sb); @@ -655,9 +653,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) if ((bsize & (bsize - 1)) || nrbitsp) { unsigned char nrbits; - for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) + for (nrbits = 31; nrbits && !(bsize & (1UL << nrbits)); nrbits--) ; - bsize = 1 << nrbits; + bsize = 1UL << nrbits; if (nrbitsp) *nrbitsp = nrbits; } diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index c8374f74dc..a68b21603e 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -9,6 +9,7 @@ #include #include #include +#include struct bl_dev_msg { int32_t status; @@ -34,6 +35,7 @@ struct nfs_net { struct nfs_netns_client *nfs_client; spinlock_t nfs_client_lock; ktime_t boot_time; + struct rpc_stat rpcstats; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_nfsfs; #endif diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 925ad7dbe6..167f2cc3c3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5421,7 +5421,7 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task, struct rpc_message *msg = &task->tk_msg; if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] && - server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) { + task->tk_status == -ENOTSUPP) { server->caps &= ~NFS_CAP_READ_PLUS; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; rpc_restart_call_prepare(task); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 258e6b1672..d452fa85a5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2102,6 +2102,7 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred { struct nfs_client *clp = server->nfs_client; struct nfs4_fs_locations *locations = NULL; + struct nfs_fattr *fattr; struct inode *inode; struct page *page; int status, result; @@ -2111,19 +2112,16 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred (unsigned long long)server->fsid.minor, clp->cl_hostname); - result = 0; page = alloc_page(GFP_KERNEL); locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); - if (page == NULL || locations == NULL) { - dprintk("<-- %s: no memory\n", __func__); - goto out; - } - locations->fattr = nfs_alloc_fattr(); - if (locations->fattr == NULL) { + fattr = nfs_alloc_fattr(); + if (page == NULL || locations == NULL || fattr == NULL) { dprintk("<-- %s: no memory\n", __func__); + result = 0; goto out; } + locations->fattr = fattr; inode = d_inode(server->super->s_root); result = nfs4_proc_get_locations(server, NFS_FH(inode), locations, page, cred); @@ -2766,6 +2764,6 @@ static int nfs4_run_state_manager(void *ptr) goto again; nfs_put_client(clp); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index bcd18e96b4..8565fa654f 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -9,322 +9,10 @@ #define _TRACE_NFS4_H #include +#include -TRACE_DEFINE_ENUM(EPERM); -TRACE_DEFINE_ENUM(ENOENT); -TRACE_DEFINE_ENUM(EIO); -TRACE_DEFINE_ENUM(ENXIO); -TRACE_DEFINE_ENUM(EACCES); -TRACE_DEFINE_ENUM(EEXIST); -TRACE_DEFINE_ENUM(EXDEV); -TRACE_DEFINE_ENUM(ENOTDIR); -TRACE_DEFINE_ENUM(EISDIR); -TRACE_DEFINE_ENUM(EFBIG); -TRACE_DEFINE_ENUM(ENOSPC); -TRACE_DEFINE_ENUM(EROFS); -TRACE_DEFINE_ENUM(EMLINK); -TRACE_DEFINE_ENUM(ENAMETOOLONG); -TRACE_DEFINE_ENUM(ENOTEMPTY); -TRACE_DEFINE_ENUM(EDQUOT); -TRACE_DEFINE_ENUM(ESTALE); -TRACE_DEFINE_ENUM(EBADHANDLE); -TRACE_DEFINE_ENUM(EBADCOOKIE); -TRACE_DEFINE_ENUM(ENOTSUPP); -TRACE_DEFINE_ENUM(ETOOSMALL); -TRACE_DEFINE_ENUM(EREMOTEIO); -TRACE_DEFINE_ENUM(EBADTYPE); -TRACE_DEFINE_ENUM(EAGAIN); -TRACE_DEFINE_ENUM(ELOOP); -TRACE_DEFINE_ENUM(EOPNOTSUPP); -TRACE_DEFINE_ENUM(EDEADLK); -TRACE_DEFINE_ENUM(ENOMEM); -TRACE_DEFINE_ENUM(EKEYEXPIRED); -TRACE_DEFINE_ENUM(ETIMEDOUT); -TRACE_DEFINE_ENUM(ERESTARTSYS); -TRACE_DEFINE_ENUM(ECONNREFUSED); -TRACE_DEFINE_ENUM(ECONNRESET); -TRACE_DEFINE_ENUM(ENETUNREACH); -TRACE_DEFINE_ENUM(EHOSTUNREACH); -TRACE_DEFINE_ENUM(EHOSTDOWN); -TRACE_DEFINE_ENUM(EPIPE); -TRACE_DEFINE_ENUM(EPFNOSUPPORT); -TRACE_DEFINE_ENUM(EPROTONOSUPPORT); - -TRACE_DEFINE_ENUM(NFS4_OK); -TRACE_DEFINE_ENUM(NFS4ERR_ACCESS); -TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED); -TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY); -TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR); -TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE); -TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE); -TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT); -TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL); -TRACE_DEFINE_ENUM(NFS4ERR_BADNAME); -TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER); -TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION); -TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT); -TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE); -TRACE_DEFINE_ENUM(NFS4ERR_BADXDR); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID); -TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN); -TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE); -TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY); -TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY); -TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION); -TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK); -TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION); -TRACE_DEFINE_ENUM(NFS4ERR_DELAY); -TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED); -TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED); -TRACE_DEFINE_ENUM(NFS4ERR_DENIED); -TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL); -TRACE_DEFINE_ENUM(NFS4ERR_DQUOT); -TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_EXIST); -TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED); -TRACE_DEFINE_ENUM(NFS4ERR_FBIG); -TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED); -TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN); -TRACE_DEFINE_ENUM(NFS4ERR_GRACE); -TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_INVAL); -TRACE_DEFINE_ENUM(NFS4ERR_IO); -TRACE_DEFINE_ENUM(NFS4ERR_ISDIR); -TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER); -TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE); -TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED); -TRACE_DEFINE_ENUM(NFS4ERR_LOCKED); -TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD); -TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE); -TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH); -TRACE_DEFINE_ENUM(NFS4ERR_MLINK); -TRACE_DEFINE_ENUM(NFS4ERR_MOVED); -TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG); -TRACE_DEFINE_ENUM(NFS4ERR_NOENT); -TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE); -TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT); -TRACE_DEFINE_ENUM(NFS4ERR_NOSPC); -TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR); -TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY); -TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP); -TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME); -TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE); -TRACE_DEFINE_ENUM(NFS4ERR_NXIO); -TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID); -TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE); -TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL); -TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION); -TRACE_DEFINE_ENUM(NFS4ERR_PERM); -TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE); -TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT); -TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT); -TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD); -TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT); -TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG); -TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG); -TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE); -TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG); -TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE); -TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH); -TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP); -TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT); -TRACE_DEFINE_ENUM(NFS4ERR_ROFS); -TRACE_DEFINE_ENUM(NFS4ERR_SAME); -TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED); -TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS); -TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY); -TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED); -TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT); -TRACE_DEFINE_ENUM(NFS4ERR_STALE); -TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID); -TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID); -TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK); -TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL); -TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS); -TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE); -TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND); -TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC); -TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED); -TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); -TRACE_DEFINE_ENUM(NFS4ERR_XDEV); - -TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS); -TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); - -#define show_nfsv4_errors(error) \ - __print_symbolic(error, \ - { NFS4_OK, "OK" }, \ - /* Mapped by nfs4_stat_to_errno() */ \ - { EPERM, "EPERM" }, \ - { ENOENT, "ENOENT" }, \ - { EIO, "EIO" }, \ - { ENXIO, "ENXIO" }, \ - { EACCES, "EACCES" }, \ - { EEXIST, "EEXIST" }, \ - { EXDEV, "EXDEV" }, \ - { ENOTDIR, "ENOTDIR" }, \ - { EISDIR, "EISDIR" }, \ - { EFBIG, "EFBIG" }, \ - { ENOSPC, "ENOSPC" }, \ - { EROFS, "EROFS" }, \ - { EMLINK, "EMLINK" }, \ - { ENAMETOOLONG, "ENAMETOOLONG" }, \ - { ENOTEMPTY, "ENOTEMPTY" }, \ - { EDQUOT, "EDQUOT" }, \ - { ESTALE, "ESTALE" }, \ - { EBADHANDLE, "EBADHANDLE" }, \ - { EBADCOOKIE, "EBADCOOKIE" }, \ - { ENOTSUPP, "ENOTSUPP" }, \ - { ETOOSMALL, "ETOOSMALL" }, \ - { EREMOTEIO, "EREMOTEIO" }, \ - { EBADTYPE, "EBADTYPE" }, \ - { EAGAIN, "EAGAIN" }, \ - { ELOOP, "ELOOP" }, \ - { EOPNOTSUPP, "EOPNOTSUPP" }, \ - { EDEADLK, "EDEADLK" }, \ - /* RPC errors */ \ - { ENOMEM, "ENOMEM" }, \ - { EKEYEXPIRED, "EKEYEXPIRED" }, \ - { ETIMEDOUT, "ETIMEDOUT" }, \ - { ERESTARTSYS, "ERESTARTSYS" }, \ - { ECONNREFUSED, "ECONNREFUSED" }, \ - { ECONNRESET, "ECONNRESET" }, \ - { ENETUNREACH, "ENETUNREACH" }, \ - { EHOSTUNREACH, "EHOSTUNREACH" }, \ - { EHOSTDOWN, "EHOSTDOWN" }, \ - { EPIPE, "EPIPE" }, \ - { EPFNOSUPPORT, "EPFNOSUPPORT" }, \ - { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ - /* NFSv4 native errors */ \ - { NFS4ERR_ACCESS, "ACCESS" }, \ - { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ - { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \ - { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \ - { NFS4ERR_BADCHAR, "BADCHAR" }, \ - { NFS4ERR_BADHANDLE, "BADHANDLE" }, \ - { NFS4ERR_BADIOMODE, "BADIOMODE" }, \ - { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \ - { NFS4ERR_BADLABEL, "BADLABEL" }, \ - { NFS4ERR_BADNAME, "BADNAME" }, \ - { NFS4ERR_BADOWNER, "BADOWNER" }, \ - { NFS4ERR_BADSESSION, "BADSESSION" }, \ - { NFS4ERR_BADSLOT, "BADSLOT" }, \ - { NFS4ERR_BADTYPE, "BADTYPE" }, \ - { NFS4ERR_BADXDR, "BADXDR" }, \ - { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \ - { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \ - { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \ - { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \ - { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \ - { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \ - { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ - { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \ - { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \ - { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \ - { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \ - "CONN_NOT_BOUND_TO_SESSION" }, \ - { NFS4ERR_DEADLOCK, "DEADLOCK" }, \ - { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \ - { NFS4ERR_DELAY, "DELAY" }, \ - { NFS4ERR_DELEG_ALREADY_WANTED, \ - "DELEG_ALREADY_WANTED" }, \ - { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \ - { NFS4ERR_DENIED, "DENIED" }, \ - { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \ - { NFS4ERR_DQUOT, "DQUOT" }, \ - { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \ - { NFS4ERR_EXIST, "EXIST" }, \ - { NFS4ERR_EXPIRED, "EXPIRED" }, \ - { NFS4ERR_FBIG, "FBIG" }, \ - { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \ - { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \ - { NFS4ERR_GRACE, "GRACE" }, \ - { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \ - { NFS4ERR_INVAL, "INVAL" }, \ - { NFS4ERR_IO, "IO" }, \ - { NFS4ERR_ISDIR, "ISDIR" }, \ - { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \ - { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \ - { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \ - { NFS4ERR_LOCKED, "LOCKED" }, \ - { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \ - { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \ - { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \ - { NFS4ERR_MLINK, "MLINK" }, \ - { NFS4ERR_MOVED, "MOVED" }, \ - { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \ - { NFS4ERR_NOENT, "NOENT" }, \ - { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \ - { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \ - { NFS4ERR_NOSPC, "NOSPC" }, \ - { NFS4ERR_NOTDIR, "NOTDIR" }, \ - { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \ - { NFS4ERR_NOTSUPP, "NOTSUPP" }, \ - { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \ - { NFS4ERR_NOT_SAME, "NOT_SAME" }, \ - { NFS4ERR_NO_GRACE, "NO_GRACE" }, \ - { NFS4ERR_NXIO, "NXIO" }, \ - { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \ - { NFS4ERR_OPENMODE, "OPENMODE" }, \ - { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \ - { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \ - { NFS4ERR_PERM, "PERM" }, \ - { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \ - { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \ - { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \ - { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \ - { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \ - { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \ - { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \ - { NFS4ERR_REP_TOO_BIG_TO_CACHE, \ - "REP_TOO_BIG_TO_CACHE" }, \ - { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \ - { NFS4ERR_RESOURCE, "RESOURCE" }, \ - { NFS4ERR_RESTOREFH, "RESTOREFH" }, \ - { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \ - { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \ - { NFS4ERR_ROFS, "ROFS" }, \ - { NFS4ERR_SAME, "SAME" }, \ - { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \ - { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \ - { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \ - { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \ - { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \ - { NFS4ERR_STALE, "STALE" }, \ - { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \ - { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \ - { NFS4ERR_SYMLINK, "SYMLINK" }, \ - { NFS4ERR_TOOSMALL, "TOOSMALL" }, \ - { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \ - { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \ - { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \ - { NFS4ERR_WRONGSEC, "WRONGSEC" }, \ - { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ - { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ - { NFS4ERR_XDEV, "XDEV" }, \ - /* ***** Internal to Linux NFS client ***** */ \ - { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \ - { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" }) - -#define show_open_flags(flags) \ - __print_flags(flags, "|", \ - { O_CREAT, "O_CREAT" }, \ - { O_EXCL, "O_EXCL" }, \ - { O_TRUNC, "O_TRUNC" }, \ - { O_DIRECT, "O_DIRECT" }) - -#define show_fmode_flags(mode) \ - __print_flags(mode, "|", \ - { ((__force unsigned long)FMODE_READ), "READ" }, \ - { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \ - { ((__force unsigned long)FMODE_EXEC), "EXEC" }) +#include +#include #define show_nfs_fattr_flags(valid) \ __print_flags((unsigned long)valid, "|", \ @@ -365,7 +53,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_printk( "error=%ld (%s) dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __get_str(dstaddr) ) ); @@ -389,29 +77,6 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session); DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence); DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete); -#define show_nfs4_sequence_status_flags(status) \ - __print_flags((unsigned long)status, "|", \ - { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ - { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, \ - "CB_GSS_CONTEXTS_EXPIRING" }, \ - { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, \ - "CB_GSS_CONTEXTS_EXPIRED" }, \ - { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, \ - "EXPIRED_ALL_STATE_REVOKED" }, \ - { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, \ - "EXPIRED_SOME_STATE_REVOKED" }, \ - { SEQ4_STATUS_ADMIN_STATE_REVOKED, \ - "ADMIN_STATE_REVOKED" }, \ - { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, \ - "RECALLABLE_STATE_REVOKED" }, \ - { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \ - { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, \ - "RESTART_RECLAIM_NEEDED" }, \ - { SEQ4_STATUS_CB_PATH_DOWN_SESSION, \ - "CB_PATH_DOWN_SESSION" }, \ - { SEQ4_STATUS_BACKCHANNEL_FAULT, \ - "BACKCHANNEL_FAULT" }) - TRACE_EVENT(nfs4_sequence_done, TP_PROTO( const struct nfs4_session *session, @@ -425,7 +90,7 @@ TRACE_EVENT(nfs4_sequence_done, __field(unsigned int, seq_nr) __field(unsigned int, highest_slotid) __field(unsigned int, target_highest_slotid) - __field(unsigned int, status_flags) + __field(unsigned long, status_flags) __field(unsigned long, error) ), @@ -444,16 +109,16 @@ TRACE_EVENT(nfs4_sequence_done, TP_printk( "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u target_highest_slotid=%u " - "status_flags=%u (%s)", + "status_flags=0x%lx (%s)", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->session, __entry->slot_nr, __entry->seq_nr, __entry->highest_slotid, __entry->target_highest_slotid, __entry->status_flags, - show_nfs4_sequence_status_flags(__entry->status_flags) + show_nfs4_seq4_status(__entry->status_flags) ) ); @@ -490,7 +155,7 @@ TRACE_EVENT(nfs4_cb_sequence, "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->session, __entry->slot_nr, __entry->seq_nr, @@ -527,7 +192,7 @@ TRACE_EVENT(nfs4_cb_seqid_err, "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->session, __entry->slot_nr, __entry->seq_nr, @@ -663,7 +328,7 @@ TRACE_EVENT(nfs4_state_mgr_failed, "hostname=%s clp state=%s error=%ld (%s) section=%s", __get_str(hostname), show_nfs4_clp_state(__entry->state), -__entry->error, - show_nfsv4_errors(__entry->error), __get_str(section) + show_nfs4_status(__entry->error), __get_str(section) ) ) @@ -696,8 +361,8 @@ TRACE_EVENT(nfs4_xdr_bad_operation, __entry->expected = expected; ), - TP_printk( - "task:%u@%d xid=0x%08x operation=%u, expected=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x operation=%u, expected=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->op, __entry->expected ) @@ -731,10 +396,10 @@ DECLARE_EVENT_CLASS(nfs4_xdr_event, __entry->error = error; ), - TP_printk( - "task:%u@%d xid=0x%08x error=%ld (%s) operation=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x error=%ld (%s) operation=%u", __entry->task_id, __entry->client_id, __entry->xid, - -__entry->error, show_nfsv4_errors(__entry->error), + -__entry->error, show_nfs4_status(__entry->error), __entry->op ) ); @@ -795,8 +460,8 @@ DECLARE_EVENT_CLASS(nfs4_open_event, TP_STRUCT__entry( __field(unsigned long, error) - __field(unsigned int, flags) - __field(unsigned int, fmode) + __field(unsigned long, flags) + __field(unsigned long, fmode) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -814,7 +479,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event, __entry->error = -error; __entry->flags = flags; - __entry->fmode = (__force unsigned int)ctx->mode; + __entry->fmode = (__force unsigned long)ctx->mode; __entry->dev = ctx->dentry->d_sb->s_dev; if (!IS_ERR_OR_NULL(state)) { inode = state->inode; @@ -844,15 +509,15 @@ DECLARE_EVENT_CLASS(nfs4_open_event, ), TP_printk( - "error=%ld (%s) flags=%d (%s) fmode=%s " + "error=%ld (%s) flags=%lu (%s) fmode=%s " "fileid=%02x:%02x:%llu fhandle=0x%08x " "name=%02x:%02x:%llu/%s stateid=%d:0x%08x " "openstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->flags, - show_open_flags(__entry->flags), - show_fmode_flags(__entry->fmode), + show_fs_fcntl_open_flags(__entry->flags), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -906,7 +571,7 @@ TRACE_EVENT(nfs4_cached_open, TP_printk( "fmode=%s fileid=%02x:%02x:%llu " "fhandle=0x%08x stateid=%d:0x%08x", - __entry->fmode ? show_fmode_flags(__entry->fmode) : + __entry->fmode ? show_fs_fmode_flags(__entry->fmode) : "closed", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -953,8 +618,8 @@ TRACE_EVENT(nfs4_close, "error=%ld (%s) fmode=%s fileid=%02x:%02x:%llu " "fhandle=0x%08x openstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), - __entry->fmode ? show_fmode_flags(__entry->fmode) : + show_nfs4_status(__entry->error), + __entry->fmode ? show_fs_fmode_flags(__entry->fmode) : "closed", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -963,24 +628,6 @@ TRACE_EVENT(nfs4_close, ) ); -TRACE_DEFINE_ENUM(F_GETLK); -TRACE_DEFINE_ENUM(F_SETLK); -TRACE_DEFINE_ENUM(F_SETLKW); -TRACE_DEFINE_ENUM(F_RDLCK); -TRACE_DEFINE_ENUM(F_WRLCK); -TRACE_DEFINE_ENUM(F_UNLCK); - -#define show_lock_cmd(type) \ - __print_symbolic((int)type, \ - { F_GETLK, "GETLK" }, \ - { F_SETLK, "SETLK" }, \ - { F_SETLKW, "SETLKW" }) -#define show_lock_type(type) \ - __print_symbolic((int)type, \ - { F_RDLCK, "RDLCK" }, \ - { F_WRLCK, "WRLCK" }, \ - { F_UNLCK, "UNLCK" }) - DECLARE_EVENT_CLASS(nfs4_lock_event, TP_PROTO( const struct file_lock *request, @@ -993,8 +640,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, TP_STRUCT__entry( __field(unsigned long, error) - __field(int, cmd) - __field(char, type) + __field(unsigned long, cmd) + __field(unsigned long, type) __field(loff_t, start) __field(loff_t, end) __field(dev_t, dev) @@ -1026,9 +673,9 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, "fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), - show_lock_cmd(__entry->cmd), - show_lock_type(__entry->type), + show_nfs4_status(__entry->error), + show_fs_fcntl_cmd(__entry->cmd), + show_fs_fcntl_lock_type(__entry->type), (long long)__entry->start, (long long)__entry->end, MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1063,8 +710,8 @@ TRACE_EVENT(nfs4_set_lock, TP_STRUCT__entry( __field(unsigned long, error) - __field(int, cmd) - __field(char, type) + __field(unsigned long, cmd) + __field(unsigned long, type) __field(loff_t, start) __field(loff_t, end) __field(dev_t, dev) @@ -1102,9 +749,9 @@ TRACE_EVENT(nfs4_set_lock, "fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x lockstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), - show_lock_cmd(__entry->cmd), - show_lock_type(__entry->type), + show_nfs4_status(__entry->error), + show_fs_fcntl_cmd(__entry->cmd), + show_fs_fcntl_lock_type(__entry->type), (long long)__entry->start, (long long)__entry->end, MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1221,7 +868,7 @@ DECLARE_EVENT_CLASS(nfs4_set_delegation_event, TP_printk( "fmode=%s fileid=%02x:%02x:%llu fhandle=0x%08x", - show_fmode_flags(__entry->fmode), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle @@ -1268,7 +915,7 @@ TRACE_EVENT(nfs4_delegreturn_exit, "error=%ld (%s) dev=%02x:%02x fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fhandle, __entry->stateid_seq, __entry->stateid_hash @@ -1311,7 +958,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1358,7 +1005,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event, TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -1405,7 +1052,7 @@ TRACE_EVENT(nfs4_lookupp, TP_printk( "error=%ld (%s) inode=%02x:%02x:%llu", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->ino ) @@ -1444,7 +1091,7 @@ TRACE_EVENT(nfs4_rename, "error=%ld (%s) oldname=%02x:%02x:%llu/%s " "newname=%02x:%02x:%llu/%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->olddir, __get_str(oldname), @@ -1479,7 +1126,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_event, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle @@ -1537,7 +1184,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1590,7 +1237,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "valid=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1646,7 +1293,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1707,7 +1354,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1756,7 +1403,7 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event, TP_printk( "error=%ld (%s) id=%u name=%s", - -__entry->error, show_nfsv4_errors(__entry->error), + -__entry->error, show_nfs4_status(__entry->error), __entry->id, __get_str(name) ) @@ -1834,7 +1481,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, "offset=%lld count=%u res=%u stateid=%d:0x%08x " "layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1908,7 +1555,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, "offset=%lld count=%u res=%u stateid=%d:0x%08x " "layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1972,7 +1619,7 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1992,16 +1639,6 @@ DEFINE_NFS4_COMMIT_EVENT(nfs4_commit); #ifdef CONFIG_NFS_V4_1 DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds); -TRACE_DEFINE_ENUM(IOMODE_READ); -TRACE_DEFINE_ENUM(IOMODE_RW); -TRACE_DEFINE_ENUM(IOMODE_ANY); - -#define show_pnfs_iomode(iomode) \ - __print_symbolic(iomode, \ - { IOMODE_READ, "READ" }, \ - { IOMODE_RW, "RW" }, \ - { IOMODE_ANY, "ANY" }) - TRACE_EVENT(nfs4_layoutget, TP_PROTO( const struct nfs_open_context *ctx, @@ -2057,11 +1694,11 @@ TRACE_EVENT(nfs4_layoutget, "iomode=%s offset=%llu count=%llu stateid=%d:0x%08x " "layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - show_pnfs_iomode(__entry->iomode), + show_pnfs_layout_iomode(__entry->iomode), (unsigned long long)__entry->offset, (unsigned long long)__entry->count, __entry->stateid_seq, __entry->stateid_hash, @@ -2155,7 +1792,7 @@ TRACE_EVENT(pnfs_update_layout, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - show_pnfs_iomode(__entry->iomode), + show_pnfs_layout_iomode(__entry->iomode), (unsigned long long)__entry->pos, (unsigned long long)__entry->count, __entry->layoutstateid_seq, __entry->layoutstateid_hash, @@ -2209,7 +1846,7 @@ DECLARE_EVENT_CLASS(pnfs_layout_event, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - show_pnfs_iomode(__entry->iomode), + show_pnfs_layout_iomode(__entry->iomode), (unsigned long long)__entry->pos, (unsigned long long)__entry->count, __entry->layoutstateid_seq, __entry->layoutstateid_hash, @@ -2354,7 +1991,7 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -2410,7 +2047,7 @@ TRACE_EVENT(ff_layout_commit_error, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%llu count=%u dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 8a224871be..6804ca2efb 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -11,45 +11,9 @@ #include #include -TRACE_DEFINE_ENUM(DT_UNKNOWN); -TRACE_DEFINE_ENUM(DT_FIFO); -TRACE_DEFINE_ENUM(DT_CHR); -TRACE_DEFINE_ENUM(DT_DIR); -TRACE_DEFINE_ENUM(DT_BLK); -TRACE_DEFINE_ENUM(DT_REG); -TRACE_DEFINE_ENUM(DT_LNK); -TRACE_DEFINE_ENUM(DT_SOCK); -TRACE_DEFINE_ENUM(DT_WHT); - -#define nfs_show_file_type(ftype) \ - __print_symbolic(ftype, \ - { DT_UNKNOWN, "UNKNOWN" }, \ - { DT_FIFO, "FIFO" }, \ - { DT_CHR, "CHR" }, \ - { DT_DIR, "DIR" }, \ - { DT_BLK, "BLK" }, \ - { DT_REG, "REG" }, \ - { DT_LNK, "LNK" }, \ - { DT_SOCK, "SOCK" }, \ - { DT_WHT, "WHT" }) - -TRACE_DEFINE_ENUM(NFS_INO_INVALID_DATA); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_ATIME); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACCESS); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACL); -TRACE_DEFINE_ENUM(NFS_INO_REVAL_PAGECACHE); -TRACE_DEFINE_ENUM(NFS_INO_REVAL_FORCED); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_LABEL); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_CHANGE); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_CTIME); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER); -TRACE_DEFINE_ENUM(NFS_INO_DATA_INVAL_DEFER); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_BLOCKS); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE); +#include +#include +#include #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ @@ -71,17 +35,6 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE); { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \ { NFS_INO_INVALID_MODE, "INVALID_MODE" }) -TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); -TRACE_DEFINE_ENUM(NFS_INO_STALE); -TRACE_DEFINE_ENUM(NFS_INO_ACL_LRU_SET); -TRACE_DEFINE_ENUM(NFS_INO_INVALIDATING); -TRACE_DEFINE_ENUM(NFS_INO_FSCACHE); -TRACE_DEFINE_ENUM(NFS_INO_FSCACHE_LOCK); -TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMIT); -TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMITTING); -TRACE_DEFINE_ENUM(NFS_INO_LAYOUTSTATS); -TRACE_DEFINE_ENUM(NFS_INO_ODIRECT); - #define nfs_show_nfsi_flags(v) \ __print_flags(v, "|", \ { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS" }, \ @@ -163,12 +116,12 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "type=%u (%s) version=%llu size=%lld " "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s)", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->type, - nfs_show_file_type(__entry->type), + show_fs_dirent_type(__entry->type), (unsigned long long)__entry->version, (long long)__entry->size, __entry->cache_validity, @@ -254,12 +207,12 @@ TRACE_EVENT(nfs_access_exit, "type=%u (%s) version=%llu size=%lld " "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) " "mask=0x%x permitted=0x%x", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->type, - nfs_show_file_type(__entry->type), + show_fs_dirent_type(__entry->type), (unsigned long long)__entry->version, (long long)__entry->size, __entry->cache_validity, @@ -270,34 +223,6 @@ TRACE_EVENT(nfs_access_exit, ) ); -TRACE_DEFINE_ENUM(LOOKUP_FOLLOW); -TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY); -TRACE_DEFINE_ENUM(LOOKUP_AUTOMOUNT); -TRACE_DEFINE_ENUM(LOOKUP_PARENT); -TRACE_DEFINE_ENUM(LOOKUP_REVAL); -TRACE_DEFINE_ENUM(LOOKUP_RCU); -TRACE_DEFINE_ENUM(LOOKUP_OPEN); -TRACE_DEFINE_ENUM(LOOKUP_CREATE); -TRACE_DEFINE_ENUM(LOOKUP_EXCL); -TRACE_DEFINE_ENUM(LOOKUP_RENAME_TARGET); -TRACE_DEFINE_ENUM(LOOKUP_EMPTY); -TRACE_DEFINE_ENUM(LOOKUP_DOWN); - -#define show_lookup_flags(flags) \ - __print_flags(flags, "|", \ - { LOOKUP_FOLLOW, "FOLLOW" }, \ - { LOOKUP_DIRECTORY, "DIRECTORY" }, \ - { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ - { LOOKUP_PARENT, "PARENT" }, \ - { LOOKUP_REVAL, "REVAL" }, \ - { LOOKUP_RCU, "RCU" }, \ - { LOOKUP_OPEN, "OPEN" }, \ - { LOOKUP_CREATE, "CREATE" }, \ - { LOOKUP_EXCL, "EXCL" }, \ - { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \ - { LOOKUP_EMPTY, "EMPTY" }, \ - { LOOKUP_DOWN, "DOWN" }) - DECLARE_EVENT_CLASS(nfs_lookup_event, TP_PROTO( const struct inode *dir, @@ -324,7 +249,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, TP_printk( "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, - show_lookup_flags(__entry->flags), + show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -368,9 +293,9 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, TP_printk( "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), __entry->flags, - show_lookup_flags(__entry->flags), + show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -392,46 +317,6 @@ DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit); DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); -TRACE_DEFINE_ENUM(O_WRONLY); -TRACE_DEFINE_ENUM(O_RDWR); -TRACE_DEFINE_ENUM(O_CREAT); -TRACE_DEFINE_ENUM(O_EXCL); -TRACE_DEFINE_ENUM(O_NOCTTY); -TRACE_DEFINE_ENUM(O_TRUNC); -TRACE_DEFINE_ENUM(O_APPEND); -TRACE_DEFINE_ENUM(O_NONBLOCK); -TRACE_DEFINE_ENUM(O_DSYNC); -TRACE_DEFINE_ENUM(O_DIRECT); -TRACE_DEFINE_ENUM(O_LARGEFILE); -TRACE_DEFINE_ENUM(O_DIRECTORY); -TRACE_DEFINE_ENUM(O_NOFOLLOW); -TRACE_DEFINE_ENUM(O_NOATIME); -TRACE_DEFINE_ENUM(O_CLOEXEC); - -#define show_open_flags(flags) \ - __print_flags(flags, "|", \ - { O_WRONLY, "O_WRONLY" }, \ - { O_RDWR, "O_RDWR" }, \ - { O_CREAT, "O_CREAT" }, \ - { O_EXCL, "O_EXCL" }, \ - { O_NOCTTY, "O_NOCTTY" }, \ - { O_TRUNC, "O_TRUNC" }, \ - { O_APPEND, "O_APPEND" }, \ - { O_NONBLOCK, "O_NONBLOCK" }, \ - { O_DSYNC, "O_DSYNC" }, \ - { O_DIRECT, "O_DIRECT" }, \ - { O_LARGEFILE, "O_LARGEFILE" }, \ - { O_DIRECTORY, "O_DIRECTORY" }, \ - { O_NOFOLLOW, "O_NOFOLLOW" }, \ - { O_NOATIME, "O_NOATIME" }, \ - { O_CLOEXEC, "O_CLOEXEC" }) - -#define show_fmode_flags(mode) \ - __print_flags(mode, "|", \ - { ((__force unsigned long)FMODE_READ), "READ" }, \ - { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \ - { ((__force unsigned long)FMODE_EXEC), "EXEC" }) - TRACE_EVENT(nfs_atomic_open_enter, TP_PROTO( const struct inode *dir, @@ -443,7 +328,7 @@ TRACE_EVENT(nfs_atomic_open_enter, TP_STRUCT__entry( __field(unsigned long, flags) - __field(unsigned int, fmode) + __field(unsigned long, fmode) __field(dev_t, dev) __field(u64, dir) __string(name, ctx->dentry->d_name.name) @@ -453,15 +338,15 @@ TRACE_EVENT(nfs_atomic_open_enter, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; - __entry->fmode = (__force unsigned int)ctx->mode; + __entry->fmode = (__force unsigned long)ctx->mode; __assign_str(name, ctx->dentry->d_name.name); ), TP_printk( "flags=0x%lx (%s) fmode=%s name=%02x:%02x:%llu/%s", __entry->flags, - show_open_flags(__entry->flags), - show_fmode_flags(__entry->fmode), + show_fs_fcntl_open_flags(__entry->flags), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -481,7 +366,7 @@ TRACE_EVENT(nfs_atomic_open_exit, TP_STRUCT__entry( __field(unsigned long, error) __field(unsigned long, flags) - __field(unsigned int, fmode) + __field(unsigned long, fmode) __field(dev_t, dev) __field(u64, dir) __string(name, ctx->dentry->d_name.name) @@ -492,17 +377,17 @@ TRACE_EVENT(nfs_atomic_open_exit, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; - __entry->fmode = (__force unsigned int)ctx->mode; + __entry->fmode = (__force unsigned long)ctx->mode; __assign_str(name, ctx->dentry->d_name.name); ), TP_printk( "error=%ld (%s) flags=0x%lx (%s) fmode=%s " "name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), __entry->flags, - show_open_flags(__entry->flags), - show_fmode_flags(__entry->fmode), + show_fs_fcntl_open_flags(__entry->flags), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -535,7 +420,7 @@ TRACE_EVENT(nfs_create_enter, TP_printk( "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, - show_open_flags(__entry->flags), + show_fs_fcntl_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -570,9 +455,9 @@ TRACE_EVENT(nfs_create_exit, TP_printk( "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), __entry->flags, - show_open_flags(__entry->flags), + show_fs_fcntl_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -640,7 +525,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done, TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -730,7 +615,7 @@ TRACE_EVENT(nfs_link_exit, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fileid, MAJOR(__entry->dev), MINOR(__entry->dev), @@ -817,7 +702,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, TP_printk( "error=%ld (%s) old_name=%02x:%02x:%llu/%s " "new_name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->old_dir, __get_str(old_name), @@ -871,7 +756,7 @@ TRACE_EVENT(nfs_sillyrename_unlink, TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -1054,16 +939,6 @@ TRACE_EVENT(nfs_pgio_error, ) ); -TRACE_DEFINE_ENUM(NFS_UNSTABLE); -TRACE_DEFINE_ENUM(NFS_DATA_SYNC); -TRACE_DEFINE_ENUM(NFS_FILE_SYNC); - -#define nfs_show_stable(stable) \ - __print_symbolic(stable, \ - { NFS_UNSTABLE, "UNSTABLE" }, \ - { NFS_DATA_SYNC, "DATA_SYNC" }, \ - { NFS_FILE_SYNC, "FILE_SYNC" }) - TRACE_EVENT(nfs_initiate_write, TP_PROTO( const struct nfs_pgio_header *hdr @@ -1077,7 +952,7 @@ TRACE_EVENT(nfs_initiate_write, __field(u64, fileid) __field(loff_t, offset) __field(u32, count) - __field(enum nfs3_stable_how, stable) + __field(unsigned long, stable) ), TP_fast_assign( @@ -1101,7 +976,7 @@ TRACE_EVENT(nfs_initiate_write, (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->count, - nfs_show_stable(__entry->stable) + show_nfs_stable_how(__entry->stable) ) ); @@ -1121,7 +996,7 @@ TRACE_EVENT(nfs_writeback_done, __field(u32, arg_count) __field(u32, res_count) __field(int, status) - __field(enum nfs3_stable_how, stable) + __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), @@ -1154,8 +1029,8 @@ TRACE_EVENT(nfs_writeback_done, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, __entry->status, - nfs_show_stable(__entry->stable), - __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) + show_nfs_stable_how(__entry->stable), + show_nfs4_verifier(__entry->verifier) ) ); @@ -1256,7 +1131,7 @@ TRACE_EVENT(nfs_commit_done, __field(u64, fileid) __field(loff_t, offset) __field(int, status) - __field(enum nfs3_stable_how, stable) + __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), @@ -1285,8 +1160,8 @@ TRACE_EVENT(nfs_commit_done, (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->status, - nfs_show_stable(__entry->stable), - __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) + show_nfs_stable_how(__entry->stable), + show_nfs4_verifier(__entry->verifier) ) ); @@ -1323,76 +1198,6 @@ TRACE_EVENT(nfs_fh_to_dentry, ) ); -TRACE_DEFINE_ENUM(NFS_OK); -TRACE_DEFINE_ENUM(NFSERR_PERM); -TRACE_DEFINE_ENUM(NFSERR_NOENT); -TRACE_DEFINE_ENUM(NFSERR_IO); -TRACE_DEFINE_ENUM(NFSERR_NXIO); -TRACE_DEFINE_ENUM(ECHILD); -TRACE_DEFINE_ENUM(NFSERR_EAGAIN); -TRACE_DEFINE_ENUM(NFSERR_ACCES); -TRACE_DEFINE_ENUM(NFSERR_EXIST); -TRACE_DEFINE_ENUM(NFSERR_XDEV); -TRACE_DEFINE_ENUM(NFSERR_NODEV); -TRACE_DEFINE_ENUM(NFSERR_NOTDIR); -TRACE_DEFINE_ENUM(NFSERR_ISDIR); -TRACE_DEFINE_ENUM(NFSERR_INVAL); -TRACE_DEFINE_ENUM(NFSERR_FBIG); -TRACE_DEFINE_ENUM(NFSERR_NOSPC); -TRACE_DEFINE_ENUM(NFSERR_ROFS); -TRACE_DEFINE_ENUM(NFSERR_MLINK); -TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP); -TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG); -TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY); -TRACE_DEFINE_ENUM(NFSERR_DQUOT); -TRACE_DEFINE_ENUM(NFSERR_STALE); -TRACE_DEFINE_ENUM(NFSERR_REMOTE); -TRACE_DEFINE_ENUM(NFSERR_WFLUSH); -TRACE_DEFINE_ENUM(NFSERR_BADHANDLE); -TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC); -TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE); -TRACE_DEFINE_ENUM(NFSERR_NOTSUPP); -TRACE_DEFINE_ENUM(NFSERR_TOOSMALL); -TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT); -TRACE_DEFINE_ENUM(NFSERR_BADTYPE); -TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); - -#define nfs_show_status(x) \ - __print_symbolic(x, \ - { NFS_OK, "OK" }, \ - { NFSERR_PERM, "PERM" }, \ - { NFSERR_NOENT, "NOENT" }, \ - { NFSERR_IO, "IO" }, \ - { NFSERR_NXIO, "NXIO" }, \ - { ECHILD, "CHILD" }, \ - { NFSERR_EAGAIN, "AGAIN" }, \ - { NFSERR_ACCES, "ACCES" }, \ - { NFSERR_EXIST, "EXIST" }, \ - { NFSERR_XDEV, "XDEV" }, \ - { NFSERR_NODEV, "NODEV" }, \ - { NFSERR_NOTDIR, "NOTDIR" }, \ - { NFSERR_ISDIR, "ISDIR" }, \ - { NFSERR_INVAL, "INVAL" }, \ - { NFSERR_FBIG, "FBIG" }, \ - { NFSERR_NOSPC, "NOSPC" }, \ - { NFSERR_ROFS, "ROFS" }, \ - { NFSERR_MLINK, "MLINK" }, \ - { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \ - { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \ - { NFSERR_NOTEMPTY, "NOTEMPTY" }, \ - { NFSERR_DQUOT, "DQUOT" }, \ - { NFSERR_STALE, "STALE" }, \ - { NFSERR_REMOTE, "REMOTE" }, \ - { NFSERR_WFLUSH, "WFLUSH" }, \ - { NFSERR_BADHANDLE, "BADHANDLE" }, \ - { NFSERR_NOT_SYNC, "NOTSYNC" }, \ - { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \ - { NFSERR_NOTSUPP, "NOTSUPP" }, \ - { NFSERR_TOOSMALL, "TOOSMALL" }, \ - { NFSERR_SERVERFAULT, "REMOTEIO" }, \ - { NFSERR_BADTYPE, "BADTYPE" }, \ - { NFSERR_JUKEBOX, "JUKEBOX" }) - DECLARE_EVENT_CLASS(nfs_xdr_event, TP_PROTO( const struct xdr_stream *xdr, @@ -1427,12 +1232,12 @@ DECLARE_EVENT_CLASS(nfs_xdr_event, __assign_str(procedure, task->tk_msg.rpc_proc->p_name); ), - TP_printk( - "task:%u@%d xid=0x%08x %sv%d %s error=%ld (%s)", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x %sv%d %s error=%ld (%s)", __entry->task_id, __entry->client_id, __entry->xid, __get_str(program), __entry->version, __get_str(procedure), -__entry->error, - nfs_show_status(__entry->error) + show_nfs_status(__entry->error) ) ); #define DEFINE_NFS_XDR_EVENT(name) \ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 3307361c79..f331f06769 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -82,10 +82,6 @@ enum pnfs_try_status { PNFS_TRY_AGAIN = 2, }; -/* error codes for internal use */ -#define NFS4ERR_RESET_TO_MDS 12001 -#define NFS4ERR_RESET_TO_PNFS 12002 - #ifdef CONFIG_NFS_V4_1 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4231d51fc1..3d06bad276 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1644,7 +1644,7 @@ static int wait_on_commit(struct nfs_mds_commit_info *cinfo) !atomic_read(&cinfo->rpcs_out)); } -static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) { atomic_inc(&cinfo->rpcs_out); } diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 6e9ea4ee0f..7f071519fb 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -8,6 +8,7 @@ config NFSD select SUNRPC select EXPORTFS select NFS_ACL_SUPPORT if NFSD_V2_ACL + select NFS_ACL_SUPPORT if NFSD_V3_ACL depends on MULTIUSER help Choose Y here if you want to allow other computers to access @@ -26,28 +27,29 @@ config NFSD Below you can choose which versions of the NFS protocol are available to clients mounting the NFS server on this system. - Support for NFS version 2 (RFC 1094) is always available when + Support for NFS version 3 (RFC 1813) is always available when CONFIG_NFSD is selected. If unsure, say N. -config NFSD_V2_ACL - bool - depends on NFSD - -config NFSD_V3 - bool "NFS server support for NFS version 3" +config NFSD_V2 + bool "NFS server support for NFS version 2 (DEPRECATED)" depends on NFSD + default n help - This option enables support in your system's NFS server for - version 3 of the NFS protocol (RFC 1813). + NFSv2 (RFC 1094) was the first publicly-released version of NFS. + Unless you are hosting ancient (1990's era) NFS clients, you don't + need this. - If unsure, say Y. + If unsure, say N. + +config NFSD_V2_ACL + bool "NFS server support for the NFSv2 ACL protocol extension" + depends on NFSD_V2 config NFSD_V3_ACL bool "NFS server support for the NFSv3 ACL protocol extension" - depends on NFSD_V3 - select NFSD_V2_ACL + depends on NFSD help Solaris NFS servers support an auxiliary NFSv3 ACL protocol that never became an official part of the NFS version 3 protocol. @@ -70,7 +72,6 @@ config NFSD_V3_ACL config NFSD_V4 bool "NFS server support for NFS version 4" depends on NFSD && PROC_FS - select NFSD_V3 select FS_POSIX_ACL select SUNRPC_GSS select CRYPTO diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 3f0983e93a..6fffc8f03f 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -10,11 +10,11 @@ obj-$(CONFIG_NFSD) += nfsd.o # this one should be compiled first, as the tracing macros can easily blow up nfsd-y += trace.o -nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ - export.o auth.o lockd.o nfscache.o nfsxdr.o \ - stats.o filecache.o +nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \ + export.o auth.o lockd.o nfscache.o \ + stats.o filecache.o nfs3proc.o nfs3xdr.o +nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o -nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ nfs4acl.o nfs4callback.o nfs4recover.o diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index ba14d2f4b6..4b7324458a 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -38,6 +38,8 @@ struct nfs4_acl; struct svc_fh; struct svc_rqst; +struct nfsd_attrs; +enum nfs_ftype4; int nfs4_acl_bytes(int entries); int nfs4_acl_get_whotype(char *, u32); @@ -45,7 +47,7 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl); -__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl); +__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl, + struct nfsd_attrs *attr); #endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index c99dee99a3..0ddd20cb68 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -16,6 +16,7 @@ #include "blocklayoutxdr.h" #include "pnfs.h" #include "filecache.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 2455dc8be1..1ed2f691eb 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -9,6 +9,7 @@ #include "nfsd.h" #include "blocklayoutxdr.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 65c331f75e..f21259ead6 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -84,6 +84,6 @@ int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); -int nfsd_reply_cache_stats_open(struct inode *, struct file *); +int nfsd_reply_cache_stats_show(struct seq_file *m, void *v); #endif /* NFSCACHE_H */ diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index ee0e3aba4a..d03f7f6a86 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -115,7 +115,6 @@ struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); int exp_rootfh(struct net *, struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); -__be32 nfserrno(int errno); static inline void exp_put(struct svc_export *exp) { diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 1e8c31ed6c..615ea83249 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1,7 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Open file cache. + * The NFSD open file cache. * * (c) 2015 - Jeff Layton + * + * An nfsd_file object is a per-file collection of open state that binds + * together: + * - a struct file * + * - a user credential + * - a network namespace + * - a read-ahead context + * - monitoring for writeback errors + * + * nfsd_file objects are reference-counted. Consumers acquire a new + * object via the nfsd_file_acquire API. They manage their interest in + * the acquired object, and hence the object's reference count, via + * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file + * object: + * + * * non-garbage-collected: When a consumer wants to precisely control + * the lifetime of a file's open state, it acquires a non-garbage- + * collected nfsd_file. The final nfsd_file_put releases the open + * state immediately. + * + * * garbage-collected: When a consumer does not control the lifetime + * of open state, it acquires a garbage-collected nfsd_file. The + * final nfsd_file_put allows the open state to linger for a period + * during which it may be re-used. */ #include @@ -12,6 +37,7 @@ #include #include #include +#include #include "vfs.h" #include "nfsd.h" @@ -20,63 +46,75 @@ #include "filecache.h" #include "trace.h" -#define NFSDDBG_FACILITY NFSDDBG_FH - -/* FIXME: dynamically size this for the machine somehow? */ -#define NFSD_FILE_HASH_BITS 12 -#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) #define NFSD_LAUNDRETTE_DELAY (2 * HZ) -#define NFSD_FILE_SHUTDOWN (1) -#define NFSD_FILE_LRU_THRESHOLD (4096UL) -#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) +#define NFSD_FILE_CACHE_UP (0) /* We only care about NFSD_MAY_READ/WRITE for this cache */ #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) -struct nfsd_fcache_bucket { - struct hlist_head nfb_head; - spinlock_t nfb_lock; - unsigned int nfb_count; - unsigned int nfb_maxcount; -}; - static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); +static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); +static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); +static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); +static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { - struct list_head list; struct work_struct work; - struct net *net; spinlock_t lock; struct list_head freeme; - struct rcu_head rcu; }; static struct workqueue_struct *nfsd_filecache_wq __read_mostly; static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; -static struct nfsd_fcache_bucket *nfsd_file_hashtbl; static struct list_lru nfsd_file_lru; -static long nfsd_file_lru_flags; +static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; -static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; -static DEFINE_SPINLOCK(laundrette_lock); -static LIST_HEAD(laundrettes); +static struct rhltable nfsd_file_rhltable + ____cacheline_aligned_in_smp; + +static bool +nfsd_match_cred(const struct cred *c1, const struct cred *c2) +{ + int i; + + if (!uid_eq(c1->fsuid, c2->fsuid)) + return false; + if (!gid_eq(c1->fsgid, c2->fsgid)) + return false; + if (c1->group_info == NULL || c2->group_info == NULL) + return c1->group_info == c2->group_info; + if (c1->group_info->ngroups != c2->group_info->ngroups) + return false; + for (i = 0; i < c1->group_info->ngroups; i++) { + if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) + return false; + } + return true; +} -static void nfsd_file_gc(void); +static const struct rhashtable_params nfsd_file_rhash_params = { + .key_len = sizeof_field(struct nfsd_file, nf_inode), + .key_offset = offsetof(struct nfsd_file, nf_inode), + .head_offset = offsetof(struct nfsd_file, nf_rlist), + + /* + * Start with a single page hash table to reduce resizing churn + * on light workloads. + */ + .min_size = 256, + .automatic_shrinking = true, +}; static void nfsd_file_schedule_laundrette(void) { - long count = atomic_long_read(&nfsd_filecache_count); - - if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) - return; - - queue_delayed_work(system_wq, &nfsd_filecache_laundrette, - NFSD_LAUNDRETTE_DELAY); + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) + queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + NFSD_LAUNDRETTE_DELAY); } static void @@ -115,22 +153,21 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm) } static struct nfsd_file_mark * -nfsd_file_mark_find_or_create(struct nfsd_file *nf) +nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) { int err; struct fsnotify_mark *mark; struct nfsd_file_mark *nfm = NULL, *new; - struct inode *inode = nf->nf_inode; do { - mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); + fsnotify_group_lock(nfsd_file_fsnotify_group); mark = fsnotify_find_mark(&inode->i_fsnotify_marks, - nfsd_file_fsnotify_group); + nfsd_file_fsnotify_group); if (mark) { nfm = nfsd_file_mark_get(container_of(mark, struct nfsd_file_mark, nfm_mark)); - mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); + fsnotify_group_unlock(nfsd_file_fsnotify_group); if (nfm) { fsnotify_put_mark(mark); break; @@ -138,8 +175,9 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) /* Avoid soft lockup race with nfsd_file_mark_put() */ fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); fsnotify_put_mark(mark); - } else - mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); + } else { + fsnotify_group_unlock(nfsd_file_fsnotify_group); + } /* allocate a new nfm */ new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); @@ -170,238 +208,233 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) } static struct nfsd_file * -nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, - struct net *net) +nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, + bool want_gc) { struct nfsd_file *nf; nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); - if (nf) { - INIT_HLIST_NODE(&nf->nf_node); - INIT_LIST_HEAD(&nf->nf_lru); - nf->nf_file = NULL; - nf->nf_cred = get_current_cred(); - nf->nf_net = net; - nf->nf_flags = 0; - nf->nf_inode = inode; - nf->nf_hashval = hashval; - refcount_set(&nf->nf_ref, 1); - nf->nf_may = may & NFSD_FILE_MAY_MASK; - nf->nf_mark = NULL; - trace_nfsd_file_alloc(nf); - } - return nf; -} - -static bool -nfsd_file_free(struct nfsd_file *nf) -{ - bool flush = false; - - trace_nfsd_file_put_final(nf); - if (nf->nf_mark) - nfsd_file_mark_put(nf->nf_mark); - if (nf->nf_file) { - get_file(nf->nf_file); - filp_close(nf->nf_file, NULL); - fput(nf->nf_file); - flush = true; - } - call_rcu(&nf->nf_rcu, nfsd_file_slab_free); - return flush; -} - -static bool -nfsd_file_check_writeback(struct nfsd_file *nf) -{ - struct file *file = nf->nf_file; - struct address_space *mapping; + if (unlikely(!nf)) + return NULL; - if (!file || !(file->f_mode & FMODE_WRITE)) - return false; - mapping = file->f_mapping; - return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || - mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); + INIT_LIST_HEAD(&nf->nf_lru); + nf->nf_birthtime = ktime_get(); + nf->nf_file = NULL; + nf->nf_cred = get_current_cred(); + nf->nf_net = net; + nf->nf_flags = want_gc ? + BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) : + BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING); + nf->nf_inode = inode; + refcount_set(&nf->nf_ref, 1); + nf->nf_may = need; + nf->nf_mark = NULL; + return nf; } -static int +/** + * nfsd_file_check_write_error - check for writeback errors on a file + * @nf: nfsd_file to check for writeback errors + * + * Check whether a nfsd_file has an unseen error. Reset the write + * verifier if so. + */ +static void nfsd_file_check_write_error(struct nfsd_file *nf) { struct file *file = nf->nf_file; - if (!file || !(file->f_mode & FMODE_WRITE)) - return 0; - return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); + if ((file->f_mode & FMODE_WRITE) && + filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err))) + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); } static void -nfsd_file_do_unhash(struct nfsd_file *nf) +nfsd_file_hash_remove(struct nfsd_file *nf) { - lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - trace_nfsd_file_unhash(nf); - - if (nfsd_file_check_write_error(nf)) - nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); - --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; - hlist_del_rcu(&nf->nf_node); - atomic_long_dec(&nfsd_filecache_count); + rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist, + nfsd_file_rhash_params); } static bool nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_do_unhash(nf); - if (!list_empty(&nf->nf_lru)) - list_lru_del(&nfsd_file_lru, &nf->nf_lru); + nfsd_file_hash_remove(nf); return true; } return false; } -/* - * Return true if the file was unhashed. - */ -static bool -nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) +static void +nfsd_file_free(struct nfsd_file *nf) { - lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); + s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); - trace_nfsd_file_unhash_and_release_locked(nf); - if (!nfsd_file_unhash(nf)) - return false; - /* keep final reference for nfsd_file_lru_dispose */ - if (refcount_dec_not_one(&nf->nf_ref)) - return true; - - list_add(&nf->nf_lru, dispose); - return true; -} + trace_nfsd_file_free(nf); -static void -nfsd_file_put_noref(struct nfsd_file *nf) -{ - trace_nfsd_file_put(nf); + this_cpu_inc(nfsd_file_releases); + this_cpu_add(nfsd_file_total_age, age); - if (refcount_dec_and_test(&nf->nf_ref)) { - WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); - nfsd_file_free(nf); + nfsd_file_unhash(nf); + if (nf->nf_mark) + nfsd_file_mark_put(nf->nf_mark); + if (nf->nf_file) { + nfsd_file_check_write_error(nf); + filp_close(nf->nf_file, NULL); } -} -void -nfsd_file_put(struct nfsd_file *nf) -{ - bool is_hashed; - - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { - nfsd_file_put_noref(nf); + /* + * If this item is still linked via nf_lru, that's a bug. + * WARN and leak it to preserve system stability. + */ + if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) return; - } - filemap_flush(nf->nf_file->f_mapping); - is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - nfsd_file_put_noref(nf); - if (is_hashed) - nfsd_file_schedule_laundrette(); - if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) - nfsd_file_gc(); + call_rcu(&nf->nf_rcu, nfsd_file_slab_free); } -struct nfsd_file * -nfsd_file_get(struct nfsd_file *nf) +static bool +nfsd_file_check_writeback(struct nfsd_file *nf) { - if (likely(refcount_inc_not_zero(&nf->nf_ref))) - return nf; - return NULL; + struct file *file = nf->nf_file; + struct address_space *mapping; + + /* File not open for write? */ + if (!(file->f_mode & FMODE_WRITE)) + return false; + + /* + * Some filesystems (e.g. NFS) flush all dirty data on close. + * On others, there is no need to wait for writeback. + */ + if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE)) + return false; + + mapping = file->f_mapping; + return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || + mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } -static void -nfsd_file_dispose_list(struct list_head *dispose) -{ - struct nfsd_file *nf; - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del(&nf->nf_lru); - nfsd_file_put_noref(nf); +static bool nfsd_file_lru_add(struct nfsd_file *nf) +{ + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { + trace_nfsd_file_lru_add(nf); + return true; } + return false; } -static void -nfsd_file_dispose_list_sync(struct list_head *dispose) +static bool nfsd_file_lru_remove(struct nfsd_file *nf) { - bool flush = false; - struct nfsd_file *nf; - - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del(&nf->nf_lru); - if (!refcount_dec_and_test(&nf->nf_ref)) - continue; - if (nfsd_file_free(nf)) - flush = true; + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { + trace_nfsd_file_lru_del(nf); + return true; } - if (flush) - flush_delayed_fput(); + return false; } -static void -nfsd_file_list_remove_disposal(struct list_head *dst, - struct nfsd_fcache_disposal *l) +struct nfsd_file * +nfsd_file_get(struct nfsd_file *nf) { - spin_lock(&l->lock); - list_splice_init(&l->freeme, dst); - spin_unlock(&l->lock); + if (nf && refcount_inc_not_zero(&nf->nf_ref)) + return nf; + return NULL; } -static void -nfsd_file_list_add_disposal(struct list_head *files, struct net *net) +/** + * nfsd_file_put - put the reference to a nfsd_file + * @nf: nfsd_file of which to put the reference + * + * Put a reference to a nfsd_file. In the non-GC case, we just put the + * reference immediately. In the GC case, if the reference would be + * the last one, the put it on the LRU instead to be cleaned up later. + */ +void +nfsd_file_put(struct nfsd_file *nf) { - struct nfsd_fcache_disposal *l; + might_sleep(); + trace_nfsd_file_put(nf); - rcu_read_lock(); - list_for_each_entry_rcu(l, &laundrettes, list) { - if (l->net == net) { - spin_lock(&l->lock); - list_splice_tail_init(files, &l->freeme); - spin_unlock(&l->lock); - queue_work(nfsd_filecache_wq, &l->work); - break; + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && + test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + /* + * If this is the last reference (nf_ref == 1), then try to + * transfer it to the LRU. + */ + if (refcount_dec_not_one(&nf->nf_ref)) + return; + + /* Try to add it to the LRU. If that fails, decrement. */ + if (nfsd_file_lru_add(nf)) { + /* If it's still hashed, we're done */ + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + nfsd_file_schedule_laundrette(); + return; + } + + /* + * We're racing with unhashing, so try to remove it from + * the LRU. If removal fails, then someone else already + * has our reference. + */ + if (!nfsd_file_lru_remove(nf)) + return; } } - rcu_read_unlock(); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); } static void -nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, - struct net *net) +nfsd_file_dispose_list(struct list_head *dispose) { - struct nfsd_file *nf, *tmp; + struct nfsd_file *nf; - list_for_each_entry_safe(nf, tmp, src, nf_lru) { - if (nf->nf_net == net) - list_move_tail(&nf->nf_lru, dst); + while (!list_empty(dispose)) { + nf = list_first_entry(dispose, struct nfsd_file, nf_lru); + list_del_init(&nf->nf_lru); + nfsd_file_free(nf); } } +/** + * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list + * @dispose: list of nfsd_files to be disposed + * + * Transfers each file to the "freeme" list for its nfsd_net, to eventually + * be disposed of by the per-net garbage collector. + */ static void nfsd_file_dispose_list_delayed(struct list_head *dispose) { - LIST_HEAD(list); - struct nfsd_file *nf; - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); - nfsd_file_list_add_disposal(&list, nf->nf_net); + struct nfsd_file *nf = list_first_entry(dispose, + struct nfsd_file, nf_lru); + struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); + struct nfsd_fcache_disposal *l = nn->fcache_disposal; + + spin_lock(&l->lock); + list_move_tail(&nf->nf_lru, &l->freeme); + spin_unlock(&l->lock); + queue_work(nfsd_filecache_wq, &l->work); } } -/* - * Note this can deadlock with nfsd_file_cache_purge. +/** + * nfsd_file_lru_cb - Examine an entry on the LRU list + * @item: LRU entry to examine + * @lru: controlling LRU + * @lock: LRU list lock (unused) + * @arg: dispose list + * + * Return values: + * %LRU_REMOVED: @item was removed from the LRU + * %LRU_ROTATE: @item is to be moved to the LRU tail + * %LRU_SKIP: @item cannot be evicted */ static enum lru_status nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, @@ -412,72 +445,60 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, struct list_head *head = arg; struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); - /* - * Do a lockless refcount check. The hashtable holds one reference, so - * we look to see if anything else has a reference, or if any have - * been put since the shrinker last ran. Those don't get unhashed and - * released. - * - * Note that in the put path, we set the flag and then decrement the - * counter. Here we check the counter and then test and clear the flag. - * That order is deliberate to ensure that we can do this locklessly. - */ - if (refcount_read(&nf->nf_ref) > 1) - goto out_skip; + /* We should only be dealing with GC entries here */ + WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); /* * Don't throw out files that are still undergoing I/O or * that have uncleared errors pending. */ - if (nfsd_file_check_writeback(nf)) - goto out_skip; + if (nfsd_file_check_writeback(nf)) { + trace_nfsd_file_gc_writeback(nf); + return LRU_SKIP; + } - if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) - goto out_skip; + /* If it was recently added to the list, skip it */ + if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { + trace_nfsd_file_gc_referenced(nf); + return LRU_ROTATE; + } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) - goto out_skip; + /* + * Put the reference held on behalf of the LRU. If it wasn't the last + * one, then just remove it from the LRU and ignore it. + */ + if (!refcount_dec_and_test(&nf->nf_ref)) { + trace_nfsd_file_gc_in_use(nf); + list_lru_isolate(lru, &nf->nf_lru); + return LRU_REMOVED; + } + /* Refcount went to zero. Unhash it and queue it to the dispose list */ + nfsd_file_unhash(nf); list_lru_isolate_move(lru, &nf->nf_lru, head); + this_cpu_inc(nfsd_file_evictions); + trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; -out_skip: - return LRU_SKIP; -} - -static unsigned long -nfsd_file_lru_walk_list(struct shrink_control *sc) -{ - LIST_HEAD(head); - struct nfsd_file *nf; - unsigned long ret; - - if (sc) - ret = list_lru_shrink_walk(&nfsd_file_lru, sc, - nfsd_file_lru_cb, &head); - else - ret = list_lru_walk(&nfsd_file_lru, - nfsd_file_lru_cb, - &head, LONG_MAX); - list_for_each_entry(nf, &head, nf_lru) { - spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - nfsd_file_do_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - } - nfsd_file_dispose_list_delayed(&head); - return ret; } static void nfsd_file_gc(void) { - nfsd_file_lru_walk_list(NULL); + LIST_HEAD(dispose); + unsigned long ret; + + ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, + &dispose, list_lru_count(&nfsd_file_lru)); + trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); + nfsd_file_dispose_list_delayed(&dispose); } static void nfsd_file_gc_worker(struct work_struct *work) { nfsd_file_gc(); - nfsd_file_schedule_laundrette(); + if (list_lru_count(&nfsd_file_lru)) + nfsd_file_schedule_laundrette(); } static unsigned long @@ -489,7 +510,14 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) static unsigned long nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) { - return nfsd_file_lru_walk_list(sc); + LIST_HEAD(dispose); + unsigned long ret; + + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, + nfsd_file_lru_cb, &dispose); + trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); + nfsd_file_dispose_list_delayed(&dispose); + return ret; } static struct shrinker nfsd_file_shrinker = { @@ -498,70 +526,123 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; +/** + * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file + * @nf: nfsd_file to attempt to queue + * @dispose: private list to queue successfully-put objects + * + * Unhash an nfsd_file, try to get a reference to it, and then put that + * reference. If it's the last reference, queue it to the dispose list. + */ static void -__nfsd_file_close_inode(struct inode *inode, unsigned int hashval, - struct list_head *dispose) +nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) + __must_hold(RCU) { - struct nfsd_file *nf; - struct hlist_node *tmp; + int decrement = 1; - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { - if (inode == nf->nf_inode) - nfsd_file_unhash_and_release_locked(nf, dispose); + /* If we raced with someone else unhashing, ignore it */ + if (!nfsd_file_unhash(nf)) + return; + + /* If we can't get a reference, ignore it */ + if (!nfsd_file_get(nf)) + return; + + /* Extra decrement if we remove from the LRU */ + if (nfsd_file_lru_remove(nf)) + ++decrement; + + /* If refcount goes to 0, then put on the dispose list */ + if (refcount_sub_and_test(decrement, &nf->nf_ref)) { + list_add(&nf->nf_lru, dispose); + trace_nfsd_file_closing(nf); } - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); } /** - * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file - * @inode: inode of the file to attempt to remove + * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode + * @inode: inode on which to close out nfsd_files + * @dispose: list on which to gather nfsd_files to close out + * + * An nfsd_file represents a struct file being held open on behalf of nfsd. + * An open file however can block other activity (such as leases), or cause + * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). + * + * This function is intended to find open nfsd_files when this sort of + * conflicting access occurs and then attempt to close those files out. * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. Also ensure that any of the - * fputs also have their final __fput done as well. + * Populates the dispose list with entries that have already had their + * refcounts go to zero. The actual free of an nfsd_file can be expensive, + * so we leave it up to the caller whether it wants to wait or not. */ -void -nfsd_file_close_inode_sync(struct inode *inode) +static void +nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); - LIST_HEAD(dispose); + struct rhlist_head *tmp, *list; + struct nfsd_file *nf; - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); - nfsd_file_dispose_list_sync(&dispose); + rcu_read_lock(); + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { + if (!test_bit(NFSD_FILE_GC, &nf->nf_flags)) + continue; + nfsd_file_cond_queue(nf, dispose); + } + rcu_read_unlock(); } /** * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. + * Close out any open nfsd_files that can be reaped for @inode. The + * actual freeing is deferred to the dispose_list_delayed infrastructure. + * + * This is used by the fsnotify callbacks and setlease notifier. */ static void nfsd_file_close_inode(struct inode *inode) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); + nfsd_file_queue_for_close(inode, &dispose); nfsd_file_dispose_list_delayed(&dispose); } +/** + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file + * @inode: inode of the file to attempt to remove + * + * Close out any open nfsd_files that can be reaped for @inode. The + * nfsd_files are closed out synchronously. + * + * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames + * when reexporting NFS. + */ +void +nfsd_file_close_inode_sync(struct inode *inode) +{ + struct nfsd_file *nf; + LIST_HEAD(dispose); + + trace_nfsd_file_close(inode); + + nfsd_file_queue_for_close(inode, &dispose); + while (!list_empty(&dispose)) { + nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); + list_del_init(&nf->nf_lru); + nfsd_file_free(nf); + } + flush_delayed_fput(); +} + /** * nfsd_file_delayed_close - close unused nfsd_files * @work: dummy * - * Walk the LRU list and close any entries that have not been used since - * the last scan. - * - * Note this can deadlock with nfsd_file_cache_purge. + * Scrape the freeme list for this nfsd_net, and then dispose of them + * all. */ static void nfsd_file_delayed_close(struct work_struct *work) @@ -570,7 +651,10 @@ nfsd_file_delayed_close(struct work_struct *work) struct nfsd_fcache_disposal *l = container_of(work, struct nfsd_fcache_disposal, work); - nfsd_file_list_remove_disposal(&head, l); + spin_lock(&l->lock); + list_splice_init(&l->freeme, &head); + spin_unlock(&l->lock); + nfsd_file_dispose_list(&head); } @@ -582,7 +666,7 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, /* Only close files for F_SETLEASE leases */ if (fl->fl_flags & FL_LEASE) - nfsd_file_close_inode_sync(file_inode(fl->fl_file)); + nfsd_file_close_inode(file_inode(fl->fl_file)); return 0; } @@ -595,6 +679,9 @@ nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *name, u32 cookie) { + if (WARN_ON_ONCE(!inode)) + return 0; + trace_nfsd_file_fsnotify_handle_event(inode, mask); /* Should be no marks on non-regular files */ @@ -622,25 +709,21 @@ static const struct fsnotify_ops nfsd_file_fsnotify_ops = { int nfsd_file_cache_init(void) { - int ret = -ENOMEM; - unsigned int i; + int ret; - clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); - - if (nfsd_file_hashtbl) + lockdep_assert_held(&nfsd_mutex); + if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) return 0; + ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params); + if (ret) + return ret; + + ret = -ENOMEM; nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); if (!nfsd_filecache_wq) goto out; - nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, - sizeof(*nfsd_file_hashtbl), GFP_KERNEL); - if (!nfsd_file_hashtbl) { - pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); - goto out_err; - } - nfsd_file_slab = kmem_cache_create("nfsd_file", sizeof(struct nfsd_file), 0, 0, NULL); if (!nfsd_file_slab) { @@ -674,7 +757,8 @@ nfsd_file_cache_init(void) goto out_shrinker; } - nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); + nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, + FSNOTIFY_GROUP_NOFS); if (IS_ERR(nfsd_file_fsnotify_group)) { pr_err("nfsd: unable to create fsnotify group: %ld\n", PTR_ERR(nfsd_file_fsnotify_group)); @@ -683,11 +767,6 @@ nfsd_file_cache_init(void) goto out_notifier; } - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); - spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); - } - INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); out: return ret; @@ -702,50 +781,47 @@ nfsd_file_cache_init(void) nfsd_file_slab = NULL; kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kvfree(nfsd_file_hashtbl); - nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; + rhltable_destroy(&nfsd_file_rhltable); goto out; } -/* - * Note this can deadlock with nfsd_file_lru_cb. +/** + * __nfsd_file_cache_purge: clean out the cache for shutdown + * @net: net-namespace to shut down the cache (may be NULL) + * + * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, + * then close out everything. Called when an nfsd instance is being shut down, + * and when the exports table is flushed. */ -void -nfsd_file_cache_purge(struct net *net) +static void +__nfsd_file_cache_purge(struct net *net) { - unsigned int i; - struct nfsd_file *nf; - struct hlist_node *next; + struct rhashtable_iter iter; + struct nfsd_file *nf; LIST_HEAD(dispose); - bool del; - if (!nfsd_file_hashtbl) - return; + rhltable_walk_enter(&nfsd_file_rhltable, &iter); + do { + rhashtable_walk_start(&iter); - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; + nf = rhashtable_walk_next(&iter); + while (!IS_ERR_OR_NULL(nf)) { + if (!net || nf->nf_net == net) + nfsd_file_cond_queue(nf, &dispose); + nf = rhashtable_walk_next(&iter); + } - spin_lock(&nfb->nfb_lock); - hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { - if (net && nf->nf_net != net) - continue; - del = nfsd_file_unhash_and_release_locked(nf, &dispose); + rhashtable_walk_stop(&iter); + } while (nf == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter); - /* - * Deadlock detected! Something marked this entry as - * unhased, but hasn't removed it from the hash list. - */ - WARN_ON_ONCE(!del); - } - spin_unlock(&nfb->nfb_lock); - nfsd_file_dispose_list(&dispose); - } + nfsd_file_dispose_list(&dispose); } static struct nfsd_fcache_disposal * -nfsd_alloc_fcache_disposal(struct net *net) +nfsd_alloc_fcache_disposal(void) { struct nfsd_fcache_disposal *l; @@ -753,7 +829,6 @@ nfsd_alloc_fcache_disposal(struct net *net) if (!l) return NULL; INIT_WORK(&l->work, nfsd_file_delayed_close); - l->net = net; spin_lock_init(&l->lock); INIT_LIST_HEAD(&l->freeme); return l; @@ -762,61 +837,40 @@ nfsd_alloc_fcache_disposal(struct net *net) static void nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) { - rcu_assign_pointer(l->net, NULL); cancel_work_sync(&l->work); nfsd_file_dispose_list(&l->freeme); - kfree_rcu(l, rcu); -} - -static void -nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) -{ - spin_lock(&laundrette_lock); - list_add_tail_rcu(&l->list, &laundrettes); - spin_unlock(&laundrette_lock); + kfree(l); } static void -nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) -{ - spin_lock(&laundrette_lock); - list_del_rcu(&l->list); - spin_unlock(&laundrette_lock); -} - -static int -nfsd_alloc_fcache_disposal_net(struct net *net) +nfsd_free_fcache_disposal_net(struct net *net) { - struct nfsd_fcache_disposal *l; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_fcache_disposal *l = nn->fcache_disposal; - l = nfsd_alloc_fcache_disposal(net); - if (!l) - return -ENOMEM; - nfsd_add_fcache_disposal(l); - return 0; + nfsd_free_fcache_disposal(l); } -static void -nfsd_free_fcache_disposal_net(struct net *net) +int +nfsd_file_cache_start_net(struct net *net) { - struct nfsd_fcache_disposal *l; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - rcu_read_lock(); - list_for_each_entry_rcu(l, &laundrettes, list) { - if (l->net != net) - continue; - nfsd_del_fcache_disposal(l); - rcu_read_unlock(); - nfsd_free_fcache_disposal(l); - return; - } - rcu_read_unlock(); + nn->fcache_disposal = nfsd_alloc_fcache_disposal(); + return nn->fcache_disposal ? 0 : -ENOMEM; } -int -nfsd_file_cache_start_net(struct net *net) +/** + * nfsd_file_cache_purge - Remove all cache items associated with @net + * @net: target net namespace + * + */ +void +nfsd_file_cache_purge(struct net *net) { - return nfsd_alloc_fcache_disposal_net(net); + lockdep_assert_held(&nfsd_mutex); + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) + __nfsd_file_cache_purge(net); } void @@ -829,7 +883,11 @@ nfsd_file_cache_shutdown_net(struct net *net) void nfsd_file_cache_shutdown(void) { - set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); + int i; + + lockdep_assert_held(&nfsd_mutex); + if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) + return; lease_unregister_notifier(&nfsd_file_lease_notifier); unregister_shrinker(&nfsd_file_shrinker); @@ -838,7 +896,7 @@ nfsd_file_cache_shutdown(void) * calling nfsd_file_cache_purge */ cancel_delayed_work_sync(&nfsd_filecache_laundrette); - nfsd_file_cache_purge(NULL); + __nfsd_file_cache_purge(NULL); list_lru_destroy(&nfsd_file_lru); rcu_barrier(); fsnotify_put_group(nfsd_file_fsnotify_group); @@ -848,226 +906,332 @@ nfsd_file_cache_shutdown(void) fsnotify_wait_marks_destroyed(); kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kvfree(nfsd_file_hashtbl); - nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; -} - -static bool -nfsd_match_cred(const struct cred *c1, const struct cred *c2) -{ - int i; - - if (!uid_eq(c1->fsuid, c2->fsuid)) - return false; - if (!gid_eq(c1->fsgid, c2->fsgid)) - return false; - if (c1->group_info == NULL || c2->group_info == NULL) - return c1->group_info == c2->group_info; - if (c1->group_info->ngroups != c2->group_info->ngroups) - return false; - for (i = 0; i < c1->group_info->ngroups; i++) { - if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) - return false; + rhltable_destroy(&nfsd_file_rhltable); + + for_each_possible_cpu(i) { + per_cpu(nfsd_file_cache_hits, i) = 0; + per_cpu(nfsd_file_acquisitions, i) = 0; + per_cpu(nfsd_file_releases, i) = 0; + per_cpu(nfsd_file_total_age, i) = 0; + per_cpu(nfsd_file_evictions, i) = 0; } - return true; } static struct nfsd_file * -nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, - unsigned int hashval, struct net *net) +nfsd_file_lookup_locked(const struct net *net, const struct cred *cred, + struct inode *inode, unsigned char need, + bool want_gc) { + struct rhlist_head *tmp, *list; struct nfsd_file *nf; - unsigned char need = may_flags & NFSD_FILE_MAY_MASK; - hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, - nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { if (nf->nf_may != need) continue; - if (nf->nf_inode != inode) - continue; if (nf->nf_net != net) continue; - if (!nfsd_match_cred(nf->nf_cred, current_cred())) + if (!nfsd_match_cred(nf->nf_cred, cred)) continue; - if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc) continue; - if (nfsd_file_get(nf) != NULL) - return nf; + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) + continue; + + if (!nfsd_file_get(nf)) + continue; + return nf; } return NULL; } /** - * nfsd_file_is_cached - are there any cached open files for this fh? - * @inode: inode of the file to check + * nfsd_file_is_cached - are there any cached open files for this inode? + * @inode: inode to check + * + * The lookup matches inodes in all net namespaces and is atomic wrt + * nfsd_file_acquire(). * - * Scan the hashtable for open files that match this fh. Returns true if there - * are any, and false if not. + * Return values: + * %true: filecache contains at least one file matching this inode + * %false: filecache contains no files matching this inode */ bool nfsd_file_is_cached(struct inode *inode) { - bool ret = false; - struct nfsd_file *nf; - unsigned int hashval; - - hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); + struct rhlist_head *tmp, *list; + struct nfsd_file *nf; + bool ret = false; rcu_read_lock(); - hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, - nf_node) { - if (inode == nf->nf_inode) { + list = rhltable_lookup(&nfsd_file_rhltable, &inode, + nfsd_file_rhash_params); + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) + if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) { ret = true; break; } - } rcu_read_unlock(); - trace_nfsd_file_is_cached(inode, hashval, (int)ret); + + trace_nfsd_file_is_cached(inode, (int)ret); return ret; } -__be32 -nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf) +static __be32 +nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf, bool want_gc) { - __be32 status; + unsigned char need = may_flags & NFSD_FILE_MAY_MASK; struct net *net = SVC_NET(rqstp); - struct nfsd_file *nf, *new; + struct nfsd_file *new, *nf; + const struct cred *cred; + bool open_retry = true; struct inode *inode; - unsigned int hashval; - bool retry = true; + __be32 status; + int ret; - /* FIXME: skip this if fh_dentry is already set? */ status = fh_verify(rqstp, fhp, S_IFREG, may_flags|NFSD_MAY_OWNER_OVERRIDE); if (status != nfs_ok) return status; - inode = d_inode(fhp->fh_dentry); - hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); + cred = get_current_cred(); + retry: rcu_read_lock(); - nf = nfsd_file_find_locked(inode, may_flags, hashval, net); + nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); rcu_read_unlock(); - if (nf) + + if (nf) { + /* + * If the nf is on the LRU then it holds an extra reference + * that must be put if it's removed. It had better not be + * the last one however, since we should hold another. + */ + if (nfsd_file_lru_remove(nf)) + WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); goto wait_for_construction; + } - new = nfsd_file_alloc(inode, may_flags, hashval, net); + new = nfsd_file_alloc(net, inode, need, want_gc); if (!new) { - trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, - NULL, nfserr_jukebox); - return nfserr_jukebox; + status = nfserr_jukebox; + goto out; } - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - nf = nfsd_file_find_locked(inode, may_flags, hashval, net); - if (nf == NULL) + rcu_read_lock(); + spin_lock(&inode->i_lock); + nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); + if (unlikely(nf)) { + spin_unlock(&inode->i_lock); + rcu_read_unlock(); + nfsd_file_slab_free(&new->nf_rcu); + goto wait_for_construction; + } + nf = new; + ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist, + nfsd_file_rhash_params); + spin_unlock(&inode->i_lock); + rcu_read_unlock(); + if (likely(ret == 0)) goto open_file; - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - nfsd_file_slab_free(&new->nf_rcu); + + if (ret == -EEXIST) + goto retry; + trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); + status = nfserr_jukebox; + goto construction_err; wait_for_construction: wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - if (!retry) { + trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf); + if (!open_retry) { status = nfserr_jukebox; - goto out; + goto construction_err; } - retry = false; - nfsd_file_put_noref(nf); + open_retry = false; goto retry; } - this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); -out: - if (status == nfs_ok) { - *pnf = nf; - } else { + if (status != nfs_ok) { nfsd_file_put(nf); nf = NULL; } - trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); +out: + if (status == nfs_ok) { + this_cpu_inc(nfsd_file_acquisitions); + nfsd_file_check_write_error(nf); + *pnf = nf; + } + put_cred(cred); + trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); return status; + open_file: - nf = new; - /* Take reference for the hashtable */ - refcount_inc(&nf->nf_ref); - __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); - __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - list_lru_add(&nfsd_file_lru, &nf->nf_lru); - hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); - ++nfsd_file_hashtbl[hashval].nfb_count; - nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, - nfsd_file_hashtbl[hashval].nfb_count); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) - nfsd_file_gc(); - - nf->nf_mark = nfsd_file_mark_find_or_create(nf); - if (nf->nf_mark) - status = nfsd_open_verified(rqstp, fhp, S_IFREG, - may_flags, &nf->nf_file); - else + trace_nfsd_file_alloc(nf); + nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode); + if (nf->nf_mark) { + if (file) { + get_file(file); + nf->nf_file = file; + status = nfs_ok; + trace_nfsd_file_opened(nf, status); + } else { + status = nfsd_open_verified(rqstp, fhp, may_flags, + &nf->nf_file); + trace_nfsd_file_open(nf, status); + } + } else status = nfserr_jukebox; /* * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || inode->i_nlink == 0) { - bool do_free; - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - do_free = nfsd_file_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - if (do_free) - nfsd_file_put_noref(nf); - } - clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); - smp_mb__after_atomic(); - wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); + if (status != nfs_ok || inode->i_nlink == 0) + nfsd_file_unhash(nf); + clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); + if (status == nfs_ok) + goto out; + +construction_err: + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); + nf = NULL; goto out; } +/** + * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file + * @rqstp: the RPC transaction being executed + * @fhp: the NFS filehandle of the file to be opened + * @may_flags: NFSD_MAY_ settings for the file + * @pnf: OUT: new or found "struct nfsd_file" object + * + * The nfsd_file object returned by this API is reference-counted + * and garbage-collected. The object is retained for a few + * seconds after the final nfsd_file_put() in case the caller + * wants to re-use it. + * + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. + */ +__be32 +nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf) +{ + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); +} + +/** + * nfsd_file_acquire - Get a struct nfsd_file with an open file + * @rqstp: the RPC transaction being executed + * @fhp: the NFS filehandle of the file to be opened + * @may_flags: NFSD_MAY_ settings for the file + * @pnf: OUT: new or found "struct nfsd_file" object + * + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is unhashed after the + * final nfsd_file_put(). + * + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. + */ +__be32 +nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf) +{ + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); +} + +/** + * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file + * @rqstp: the RPC transaction being executed + * @fhp: the NFS filehandle of the file just created + * @may_flags: NFSD_MAY_ settings for the file + * @file: cached, already-open file (may be NULL) + * @pnf: OUT: new or found "struct nfsd_file" object + * + * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, + * and @file is non-NULL, use it to instantiate a new nfsd_file instead of + * opening a new one. + * + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. + */ +__be32 +nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf) +{ + return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); +} + /* * Note that fields may be added, removed or reordered in the future. Programs * scraping this file for info should test the labels to ensure they're * getting the correct field. */ -static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) +int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned int i, count = 0, longest = 0; - unsigned long hits = 0; + unsigned long releases = 0, evictions = 0; + unsigned long hits = 0, acquisitions = 0; + unsigned int i, count = 0, buckets = 0; + unsigned long lru = 0, total_age = 0; - /* - * No need for spinlocks here since we're not terribly interested in - * accuracy. We do take the nfsd_mutex simply to ensure that we - * don't end up racing with server shutdown - */ + /* Serialize with server shutdown */ mutex_lock(&nfsd_mutex); - if (nfsd_file_hashtbl) { - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - count += nfsd_file_hashtbl[i].nfb_count; - longest = max(longest, nfsd_file_hashtbl[i].nfb_count); - } + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { + struct bucket_table *tbl; + struct rhashtable *ht; + + lru = list_lru_count(&nfsd_file_lru); + + rcu_read_lock(); + ht = &nfsd_file_rhltable.ht; + count = atomic_read(&ht->nelems); + tbl = rht_dereference_rcu(ht->tbl, ht); + buckets = tbl->size; + rcu_read_unlock(); } mutex_unlock(&nfsd_mutex); - for_each_possible_cpu(i) + for_each_possible_cpu(i) { hits += per_cpu(nfsd_file_cache_hits, i); + acquisitions += per_cpu(nfsd_file_acquisitions, i); + releases += per_cpu(nfsd_file_releases, i); + total_age += per_cpu(nfsd_file_total_age, i); + evictions += per_cpu(nfsd_file_evictions, i); + } - seq_printf(m, "total entries: %u\n", count); - seq_printf(m, "longest chain: %u\n", longest); + seq_printf(m, "total inodes: %u\n", count); + seq_printf(m, "hash buckets: %u\n", buckets); + seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); + seq_printf(m, "acquisitions: %lu\n", acquisitions); + seq_printf(m, "releases: %lu\n", releases); + seq_printf(m, "evictions: %lu\n", evictions); + if (releases) + seq_printf(m, "mean age (ms): %ld\n", total_age / releases); + else + seq_printf(m, "mean age (ms): -\n"); return 0; } - -int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, nfsd_file_cache_stats_show, NULL); -} diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 63104be286..e54165a322 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -29,21 +29,23 @@ struct nfsd_file_mark { * never be dereferenced, only used for comparison. */ struct nfsd_file { - struct hlist_node nf_node; - struct list_head nf_lru; - struct rcu_head nf_rcu; + struct rhlist_head nf_rlist; + void *nf_inode; struct file *nf_file; const struct cred *nf_cred; struct net *nf_net; #define NFSD_FILE_HASHED (0) #define NFSD_FILE_PENDING (1) #define NFSD_FILE_REFERENCED (2) +#define NFSD_FILE_GC (3) unsigned long nf_flags; - struct inode *nf_inode; - unsigned int nf_hashval; refcount_t nf_ref; unsigned char nf_may; + struct nfsd_file_mark *nf_mark; + struct list_head nf_lru; + struct rcu_head nf_rcu; + ktime_t nf_birthtime; }; int nfsd_file_cache_init(void); @@ -55,7 +57,12 @@ void nfsd_file_put(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); bool nfsd_file_is_cached(struct inode *inode); +__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); -int nfsd_file_cache_stats_open(struct inode *, struct file *); +__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **nfp); +int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index db7ef07ae5..fabc21ed68 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -15,6 +15,7 @@ #include "flexfilelayoutxdr.h" #include "pnfs.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS @@ -61,7 +62,7 @@ nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, goto out_error; fl->fh.size = fhp->fh_handle.fh_size; - memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size); + memcpy(fl->fh.data, &fhp->fh_handle.fh_raw, fl->fh.size); /* Give whole file layout segments */ seg->offset = 0; diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 606fa155c2..46a7f9b813 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -35,7 +35,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp, /* must initialize before using! but maxsize doesn't matter */ fh_init(&fh,0); fh.fh_handle.fh_size = f->size; - memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size); + memcpy(&fh.fh_handle.fh_raw, f->data, f->size); fh.fh_export = NULL; access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 935c1028c2..51a4b7885c 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -11,6 +11,7 @@ #include #include #include +#include /* Hash tables for nfs4_clientid state */ #define CLIENT_HASH_BITS 4 @@ -108,9 +109,8 @@ struct nfsd_net { bool nfsd_net_up; bool lockd_up; - /* Time of server startup */ - struct timespec64 nfssvc_boot; - seqlock_t boot_lock; + seqlock_t writeverf_lock; + unsigned char writeverf[8]; /* * Max number of connections this nfsd container will allow. Defaults @@ -123,12 +123,13 @@ struct nfsd_net { u32 clverifier_counter; struct svc_serv *nfsd_serv; - - wait_queue_head_t ntf_wq; - atomic_t ntf_refcnt; - - /* Allow umount to wait for nfsd state cleanup */ - struct completion nfsd_shutdown_complete; + /* When a listening socket is added to nfsd, keep_active is set + * and this justifies a reference on nfsd_serv. This stops + * nfsd_serv from being freed. When the number of threads is + * set, keep_active is cleared and the reference is dropped. So + * when the last thread exits, the service will be destroyed. + */ + int keep_active; /* * clientid and stateid data for construction of net unique COPY @@ -184,6 +185,17 @@ struct nfsd_net { /* utsname taken from the process that starts the server */ char nfsd_name[UNX_MAXNODENAME+1]; + + struct nfsd_fcache_disposal *fcache_disposal; + + siphash_key_t siphash_key; + + atomic_t nfs4_client_count; + int nfs4_max_clients; + + atomic_t nfsd_courtesy_clients; + struct shrinker nfsd_client_shrinker; + struct work_struct nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ @@ -193,6 +205,6 @@ extern void nfsd_netns_free_versions(struct nfsd_net *nn); extern unsigned int nfsd_net_id; -void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn); -void nfsd_reset_boot_verifier(struct nfsd_net *nn); +void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn); +void nfsd_reset_write_verifier(struct nfsd_net *nn); #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 30a1782a03..65d4511b7a 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -111,7 +111,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_errno; - fh_lock(fh); + inode_lock(inode); error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, argp->acl_access); @@ -122,7 +122,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_drop_lock; - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); @@ -136,7 +136,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) return rpc_success; out_drop_lock: - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); out_errno: resp->status = nfserrno(error); @@ -188,51 +188,51 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp) * XDR decode functions */ -static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_getaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &argp->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return 0; + return false; - return 1; + return true; } -static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_setaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &argp->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return 0; + return false; if (argp->mask & ~NFS_ACL_MASK) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ? &argp->acl_access : NULL)) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ? &argp->acl_default : NULL)) - return 0; + return false; - return 1; + return true; } -static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_accessargs *args = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->access) < 0) - return 0; + return false; - return 1; + return true; } /* @@ -240,9 +240,9 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) */ /* GETACL */ -static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; struct inode *inode; @@ -270,9 +270,9 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) } /* ACCESS */ -static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_accessres *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) @@ -321,6 +321,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, @@ -332,6 +333,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_encode = nfsaclsvc_encode_getaclres, .pc_release = nfsaclsvc_release_getacl, .pc_argsize = sizeof(struct nfsd3_getaclargs), + .pc_argzero = sizeof(struct nfsd3_getaclargs), .pc_ressize = sizeof(struct nfsd3_getaclres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+1+2*(1+ACL), @@ -343,6 +345,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd3_setaclargs), + .pc_argzero = sizeof(struct nfsd3_setaclargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, @@ -354,6 +357,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, @@ -365,6 +369,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_encode = nfsaclsvc_encode_accessres, .pc_release = nfsaclsvc_release_access, .pc_argsize = sizeof(struct nfsd3_accessargs), + .pc_argzero = sizeof(struct nfsd3_accessargs), .pc_ressize = sizeof(struct nfsd3_accessres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT+1, diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 5dfe7644a5..a34a22e272 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -101,7 +101,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_errno; - fh_lock(fh); + inode_lock(inode); error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, argp->acl_access); @@ -111,7 +111,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) argp->acl_default); out_drop_lock: - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); out_errno: resp->status = nfserrno(error); @@ -127,38 +127,38 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) * XDR decode functions */ -static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_getaclargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->mask) < 0) - return 0; + return false; - return 1; + return true; } -static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_setaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return 0; + return false; if (argp->mask & ~NFS_ACL_MASK) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ? &argp->acl_access : NULL)) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ? &argp->acl_default : NULL)) - return 0; + return false; - return 1; + return true; } /* @@ -166,61 +166,43 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) */ /* GETACL */ -static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; - struct kvec *head = rqstp->rq_res.head; struct inode *inode; - unsigned int base; - int n; - int w; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: inode = d_inode(dentry); if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->mask) < 0) - return 0; - - base = (char *)xdr->p - (char *)head->iov_base; - - rqstp->rq_res.page_len = w = nfsacl_size( - (resp->mask & NFS_ACL) ? resp->acl_access : NULL, - (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); - while (w > 0) { - if (!*(rqstp->rq_next_page++)) - return 0; - w -= PAGE_SIZE; - } - - n = nfsacl_encode(&rqstp->rq_res, base, inode, - resp->acl_access, - resp->mask & NFS_ACL, 0); - if (n > 0) - n = nfsacl_encode(&rqstp->rq_res, base + n, inode, - resp->acl_default, - resp->mask & NFS_DFACL, - NFS_ACL_DEFAULT); - if (n <= 0) - return 0; + return false; + + if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access, + resp->mask & NFS_ACL, 0)) + return false; + if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT)) + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* SETACL */ -static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && @@ -252,6 +234,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, @@ -263,6 +246,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = { .pc_encode = nfs3svc_encode_getaclres, .pc_release = nfs3svc_release_getacl, .pc_argsize = sizeof(struct nfsd3_getaclargs), + .pc_argzero = sizeof(struct nfsd3_getaclargs), .pc_ressize = sizeof(struct nfsd3_getaclres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+1+2*(1+ACL), @@ -274,6 +258,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = { .pc_encode = nfs3svc_encode_setaclres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_setaclargs), + .pc_argzero = sizeof(struct nfsd3_setaclargs), .pc_ressize = sizeof(struct nfsd3_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT, diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index eaf785aec0..d01b29aba6 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -8,10 +8,12 @@ #include #include #include +#include #include "cache.h" #include "xdr3.h" #include "vfs.h" +#include "filecache.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -66,12 +68,15 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp) { struct nfsd3_sattrargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: SETATTR(3) %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_setattr(rqstp, &resp->fh, &argp->attrs, + resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, argp->check_guard, argp->guardtime); return rpc_success; } @@ -220,17 +225,137 @@ nfsd3_proc_write(struct svc_rqst *rqstp) } /* - * With NFSv3, CREATE processing is a lot easier than with NFSv2. - * At least in theory; we'll see how it fares in practice when the - * first reports about SunOS compatibility problems start to pour in... + * Implement NFSv3's unchecked, guarded, and exclusive CREATE + * semantics for regular files. Except for the created file, + * this operation is stateless on the server. + * + * Upon return, caller must release @fhp and @resfhp. */ +static __be32 +nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct svc_fh *resfhp, struct nfsd3_createargs *argp) +{ + struct iattr *iap = &argp->attrs; + struct dentry *parent, *child; + struct nfsd_attrs attrs = { + .na_iattr = iap, + }; + __u32 v_mtime, v_atime; + struct inode *inode; + __be32 status; + int host_err; + + if (isdotent(argp->name, argp->len)) + return nfserr_exist; + if (!(iap->ia_valid & ATTR_MODE)) + iap->ia_mode = 0; + + status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); + if (status != nfs_ok) + return status; + + parent = fhp->fh_dentry; + inode = d_inode(parent); + + host_err = fh_want_write(fhp); + if (host_err) + return nfserrno(host_err); + + inode_lock_nested(inode, I_MUTEX_PARENT); + + child = lookup_one_len(argp->name, parent, argp->len); + if (IS_ERR(child)) { + status = nfserrno(PTR_ERR(child)); + goto out; + } + + if (d_really_is_negative(child)) { + status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); + if (status != nfs_ok) + goto out; + } + + status = fh_compose(resfhp, fhp->fh_export, child, fhp); + if (status != nfs_ok) + goto out; + + v_mtime = 0; + v_atime = 0; + if (argp->createmode == NFS3_CREATE_EXCLUSIVE) { + u32 *verifier = (u32 *)argp->verf; + + /* + * Solaris 7 gets confused (bugid 4218508) if these have + * the high bit set, as do xfs filesystems without the + * "bigtime" feature. So just clear the high bits. + */ + v_mtime = verifier[0] & 0x7fffffff; + v_atime = verifier[1] & 0x7fffffff; + } + + if (d_really_is_positive(child)) { + status = nfs_ok; + + switch (argp->createmode) { + case NFS3_CREATE_UNCHECKED: + if (!d_is_reg(child)) + break; + iap->ia_valid &= ATTR_SIZE; + goto set_attr; + case NFS3_CREATE_GUARDED: + status = nfserr_exist; + break; + case NFS3_CREATE_EXCLUSIVE: + if (d_inode(child)->i_mtime.tv_sec == v_mtime && + d_inode(child)->i_atime.tv_sec == v_atime && + d_inode(child)->i_size == 0) { + break; + } + status = nfserr_exist; + } + goto out; + } + + if (!IS_POSIXACL(inode)) + iap->ia_mode &= ~current_umask(); + + fh_fill_pre_attrs(fhp); + host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true); + if (host_err < 0) { + status = nfserrno(host_err); + goto out; + } + fh_fill_post_attrs(fhp); + + /* A newly created file already has a file size of zero. */ + if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) + iap->ia_valid &= ~ATTR_SIZE; + if (argp->createmode == NFS3_CREATE_EXCLUSIVE) { + iap->ia_valid = ATTR_MTIME | ATTR_ATIME | + ATTR_MTIME_SET | ATTR_ATIME_SET; + iap->ia_mtime.tv_sec = v_mtime; + iap->ia_atime.tv_sec = v_atime; + iap->ia_mtime.tv_nsec = 0; + iap->ia_atime.tv_nsec = 0; + } + +set_attr: + status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); + +out: + inode_unlock(inode); + if (child && !IS_ERR(child)) + dput(child); + fh_drop_write(fhp); + return status; +} + static __be32 nfsd3_proc_create(struct svc_rqst *rqstp) { struct nfsd3_createargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; - svc_fh *dirfhp, *newfhp = NULL; - struct iattr *attr; + svc_fh *dirfhp, *newfhp; dprintk("nfsd: CREATE(3) %s %.*s\n", SVCFH_fmt(&argp->fh), @@ -239,21 +364,8 @@ nfsd3_proc_create(struct svc_rqst *rqstp) dirfhp = fh_copy(&resp->dirfh, &argp->fh); newfhp = fh_init(&resp->fh, NFS3_FHSIZE); - attr = &argp->attrs; - - /* Unfudge the mode bits */ - attr->ia_mode &= ~S_IFMT; - if (!(attr->ia_valid & ATTR_MODE)) { - attr->ia_valid |= ATTR_MODE; - attr->ia_mode = S_IFREG; - } else { - attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG; - } - /* Now create the file and set attributes */ - resp->status = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, - attr, newfhp, argp->createmode, - (u32 *)argp->verf, NULL, NULL); + resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp); return rpc_success; } @@ -265,6 +377,9 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) { struct nfsd3_createargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: MKDIR(3) %s %.*s\n", SVCFH_fmt(&argp->fh), @@ -275,8 +390,7 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, - &argp->attrs, S_IFDIR, 0, &resp->fh); - fh_unlock(&resp->dirfh); + &attrs, S_IFDIR, 0, &resp->fh); return rpc_success; } @@ -285,6 +399,9 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) { struct nfsd3_symlinkargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; if (argp->tlen == 0) { resp->status = nfserr_inval; @@ -311,7 +428,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, - argp->flen, argp->tname, &resp->fh); + argp->flen, argp->tname, &attrs, &resp->fh); kfree(argp->tname); out: return rpc_success; @@ -325,6 +442,9 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) { struct nfsd3_mknodargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; int type; dev_t rdev = 0; @@ -350,8 +470,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) type = nfs3_ftypes[argp->ftype]; resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, - &argp->attrs, type, rdev, &resp->fh); - fh_unlock(&resp->dirfh); + &attrs, type, rdev, &resp->fh); out: return rpc_success; } @@ -374,7 +493,6 @@ nfsd3_proc_remove(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); - fh_unlock(&resp->fh); return rpc_success; } @@ -395,7 +513,6 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); - fh_unlock(&resp->fh); return rpc_success; } @@ -458,15 +575,7 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, buf->pages = rqstp->rq_next_page; rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; - /* This is xdr_init_encode(), but it assumes that - * the head kvec has already been consumed. */ - xdr_set_scratch_buffer(xdr, NULL, 0); - xdr->buf = buf; - xdr->page_ptr = buf->pages; - xdr->iov = NULL; - xdr->p = page_address(*buf->pages); - xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); - xdr->rqst = NULL; + xdr_init_encode_pages(xdr, buf, buf->pages, NULL); } /* @@ -655,6 +764,7 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) { struct nfsd3_commitargs *argp = rqstp->rq_argp; struct nfsd3_commitres *resp = rqstp->rq_resp; + struct nfsd_file *nf; dprintk("nfsd: COMMIT(3) %s %u@%Lu\n", SVCFH_fmt(&argp->fh), @@ -662,8 +772,14 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) (unsigned long long) argp->offset); fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset, + resp->status = nfsd_file_acquire_gc(rqstp, &resp->fh, NFSD_MAY_WRITE | + NFSD_MAY_NOT_BREAK_LEASE, &nf); + if (resp->status) + goto out; + resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset, argp->count, resp->verf); + nfsd_file_put(nf); +out: return rpc_success; } @@ -693,6 +809,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, @@ -704,6 +821,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_getattrres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd3_attrstatres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, @@ -715,6 +833,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_sattrargs), + .pc_argzero = sizeof(struct nfsd3_sattrargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, @@ -726,6 +845,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_lookupres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_diropres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+FH+pAT+pAT, @@ -737,6 +857,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_accessres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_accessargs), + .pc_argzero = sizeof(struct nfsd3_accessargs), .pc_ressize = sizeof(struct nfsd3_accessres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+1, @@ -748,6 +869,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readlinkres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd3_readlinkres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4, @@ -759,6 +881,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readargs), + .pc_argzero = sizeof(struct nfsd3_readargs), .pc_ressize = sizeof(struct nfsd3_readres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4, @@ -770,6 +893,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_writeres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_writeargs), + .pc_argzero = sizeof(struct nfsd3_writeargs), .pc_ressize = sizeof(struct nfsd3_writeres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC+4, @@ -781,6 +905,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_createargs), + .pc_argzero = sizeof(struct nfsd3_createargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, @@ -792,6 +917,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_mkdirargs), + .pc_argzero = sizeof(struct nfsd3_mkdirargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, @@ -803,6 +929,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_symlinkargs), + .pc_argzero = sizeof(struct nfsd3_symlinkargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, @@ -814,6 +941,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_mknodargs), + .pc_argzero = sizeof(struct nfsd3_mknodargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, @@ -825,6 +953,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, @@ -836,6 +965,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, @@ -847,6 +977,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_renameres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_renameargs), + .pc_argzero = sizeof(struct nfsd3_renameargs), .pc_ressize = sizeof(struct nfsd3_renameres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC+WC, @@ -858,6 +989,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_linkres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_linkargs), + .pc_argzero = sizeof(struct nfsd3_linkargs), .pc_ressize = sizeof(struct nfsd3_linkres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+pAT+WC, @@ -869,6 +1001,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readdirres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readdirargs), + .pc_argzero = sizeof(struct nfsd3_readdirargs), .pc_ressize = sizeof(struct nfsd3_readdirres), .pc_cachetype = RC_NOCACHE, .pc_name = "READDIR", @@ -879,6 +1012,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readdirres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readdirplusargs), + .pc_argzero = sizeof(struct nfsd3_readdirplusargs), .pc_ressize = sizeof(struct nfsd3_readdirres), .pc_cachetype = RC_NOCACHE, .pc_name = "READDIRPLUS", @@ -888,6 +1022,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_fsstatres, .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_fsstatres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+2*6+1, @@ -898,6 +1033,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_fsinfores, .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_fsinfores), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+12, @@ -908,6 +1044,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_pathconfres, .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_pathconfres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+6, @@ -919,6 +1056,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_commitres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_commitargs), + .pc_argzero = sizeof(struct nfsd3_commitargs), .pc_ressize = sizeof(struct nfsd3_commitres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+WC+2, diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 48d4f99b7f..3308dd671e 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -92,7 +92,7 @@ svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp) return false; fh_init(fhp, NFS3_FHSIZE); fhp->fh_handle.fh_size = size; - memcpy(&fhp->fh_handle.fh_base, p, size); + memcpy(&fhp->fh_handle.fh_raw, p, size); return true; } @@ -131,7 +131,7 @@ svcxdr_encode_nfs_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) *p++ = cpu_to_be32(size); if (size) p[XDR_QUADLEN(size) - 1] = 0; - memcpy(p, &fhp->fh_handle.fh_base, size); + memcpy(p, &fhp->fh_handle.fh_raw, size); return true; } @@ -487,78 +487,21 @@ svcxdr_encode_wcc_data(struct svc_rqst *rqstp, struct xdr_stream *xdr, return true; } -/* - * Fill in the pre_op attr for the wcc data - */ -void fill_pre_wcc(struct svc_fh *fhp) -{ - struct inode *inode; - struct kstat stat; - bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - __be32 err; - - if (fhp->fh_no_wcc || fhp->fh_pre_saved) - return; - inode = d_inode(fhp->fh_dentry); - err = fh_getattr(fhp, &stat); - if (err) { - /* Grab the times from inode anyway */ - stat.mtime = inode->i_mtime; - stat.ctime = inode->i_ctime; - stat.size = inode->i_size; - } - if (v4) - fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); - - fhp->fh_pre_mtime = stat.mtime; - fhp->fh_pre_ctime = stat.ctime; - fhp->fh_pre_size = stat.size; - fhp->fh_pre_saved = true; -} - -/* - * Fill in the post_op attr for the wcc data - */ -void fill_post_wcc(struct svc_fh *fhp) -{ - bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - struct inode *inode = d_inode(fhp->fh_dentry); - __be32 err; - - if (fhp->fh_no_wcc) - return; - - if (fhp->fh_post_saved) - printk("nfsd: inode locked twice during operation.\n"); - - err = fh_getattr(fhp, &fhp->fh_post_attr); - if (err) { - fhp->fh_post_saved = false; - fhp->fh_post_attr.ctime = inode->i_ctime; - } else - fhp->fh_post_saved = true; - if (v4) - fhp->fh_post_change = - nfsd4_change_attribute(&fhp->fh_post_attr, inode); -} - /* * XDR decode functions */ -int -nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_fhandle *args = rqstp->rq_argp; return svcxdr_decode_nfs_fh3(xdr, &args->fh); } -int -nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_sattrargs *args = rqstp->rq_argp; return svcxdr_decode_nfs_fh3(xdr, &args->fh) && @@ -566,88 +509,81 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) svcxdr_decode_sattrguard3(xdr, args); } -int -nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_diropargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len); } -int -nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_accessargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->access) < 0) - return 0; + return false; - return 1; + return true; } -int -nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_readargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } -int -nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_writeargs *args = rqstp->rq_argp; u32 max_blocksize = svc_max_payload(rqstp); if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->stable) < 0) - return 0; + return false; /* opaque data */ if (xdr_stream_decode_u32(xdr, &args->len) < 0) - return 0; + return false; /* request sanity */ if (args->count != args->len) - return 0; + return false; if (args->count > max_blocksize) { args->count = max_blocksize; args->len = max_blocksize; } - if (!xdr_stream_subsegment(xdr, &args->payload, args->count)) - return 0; - return 1; + return xdr_stream_subsegment(xdr, &args->payload, args->count); } -int -nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_createargs *args = rqstp->rq_argp; if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->createmode) < 0) - return 0; + return false; switch (args->createmode) { case NFS3_CREATE_UNCHECKED: case NFS3_CREATE_GUARDED: @@ -655,18 +591,17 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) case NFS3_CREATE_EXCLUSIVE: args->verf = xdr_inline_decode(xdr, NFS3_CREATEVERFSIZE); if (!args->verf) - return 0; + return false; break; default: - return 0; + return false; } - return 1; + return true; } -int -nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_createargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs3(xdr, &args->fh, @@ -674,44 +609,34 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p) svcxdr_decode_sattr3(rqstp, xdr, &args->attrs); } -int -nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_symlinkargs *args = rqstp->rq_argp; struct kvec *head = rqstp->rq_arg.head; - struct kvec *tail = rqstp->rq_arg.tail; - size_t remaining; if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen)) - return 0; + return false; if (!svcxdr_decode_sattr3(rqstp, xdr, &args->attrs)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->tlen) < 0) - return 0; - - /* request sanity */ - remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; - remaining -= xdr_stream_pos(xdr); - if (remaining < xdr_align_size(args->tlen)) - return 0; + return false; - args->first.iov_base = xdr->p; + /* symlink_data */ args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); - - return 1; + args->first.iov_base = xdr_inline_decode(xdr, args->tlen); + return args->first.iov_base != NULL; } -int -nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_mknodargs *args = rqstp->rq_argp; if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->ftype) < 0) - return 0; + return false; switch (args->ftype) { case NF3CHR: case NF3BLK: @@ -725,16 +650,15 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p) /* Valid XDR but illegal file types */ break; default: - return 0; + return false; } - return 1; + return true; } -int -nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_renameargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs3(xdr, &args->ffh, @@ -743,10 +667,9 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) &args->tname, &args->tlen); } -int -nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_linkargs *args = rqstp->rq_argp; return svcxdr_decode_nfs_fh3(xdr, &args->ffh) && @@ -754,62 +677,59 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) &args->tname, &args->tlen); } -int -nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_readdirargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->cookie) < 0) - return 0; + return false; args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); if (!args->verf) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } -int -nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_readdirargs *args = rqstp->rq_argp; u32 dircount; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->cookie) < 0) - return 0; + return false; args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); if (!args->verf) - return 0; + return false; /* dircount is ignored */ if (xdr_stream_decode_u32(xdr, &dircount) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } -int -nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_commitargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } /* @@ -817,30 +737,28 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p) */ /* GETATTR */ -int -nfs3svc_encode_getattrres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: lease_get_mtime(d_inode(resp->fh.fh_dentry), &resp->stat.mtime); if (!svcxdr_encode_fattr3(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; break; } - return 1; + return true; } /* SETATTR, REMOVE, RMDIR */ -int -nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && @@ -848,174 +766,168 @@ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p) } /* LOOKUP */ -int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_nfs_fh3(xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) - return 0; + return false; } - return 1; + return true; } /* ACCESS */ -int -nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_accessres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->access) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* READLINK */ -int -nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->len) < 0) - return 0; + return false; xdr_write_pages(xdr, resp->pages, 0, resp->len); if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* READ */ -int -nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; if (xdr_stream_encode_bool(xdr, resp->eof) < 0) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, resp->count); if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* WRITE */ -int -nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_writeres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->committed) < 0) - return 0; + return false; if (!svcxdr_encode_writeverf3(xdr, resp->verf)) - return 0; + return false; break; default: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* CREATE, MKDIR, SYMLINK, MKNOD */ -int -nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_fh3(xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) - return 0; + return false; break; default: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) - return 0; + return false; } - return 1; + return true; } /* RENAME */ -int -nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_renameres *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && @@ -1024,10 +936,9 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p) } /* LINK */ -int -nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_linkres *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && @@ -1036,34 +947,33 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p) } /* READDIR */ -int -nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readdirres *resp = rqstp->rq_resp; struct xdr_buf *dirlist = &resp->dirlist; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) - return 0; + return false; xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) - return 0; + return false; if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } static __be32 @@ -1140,7 +1050,7 @@ svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name, return false; /* cookie */ resp->cookie_offset = dirlist->len; - if (xdr_stream_encode_u64(xdr, NFS_OFFSET_MAX) < 0) + if (xdr_stream_encode_u64(xdr, OFFSET_MAX) < 0) return false; return true; @@ -1290,27 +1200,26 @@ svcxdr_encode_fsstat3resok(struct xdr_stream *xdr, } /* FSSTAT */ -int -nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_fsstatres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; if (!svcxdr_encode_fsstat3resok(xdr, resp)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; } - return 1; + return true; } static bool @@ -1337,27 +1246,26 @@ svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr, } /* FSINFO */ -int -nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_fsinfores *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; if (!svcxdr_encode_fsinfo3resok(xdr, resp)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; } - return 1; + return true; } static bool @@ -1380,51 +1288,49 @@ svcxdr_encode_pathconf3resok(struct xdr_stream *xdr, } /* PATHCONF */ -int -nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_pathconfres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; if (!svcxdr_encode_pathconf3resok(xdr, resp)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; } - return 1; + return true; } /* COMMIT */ -int -nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_commitres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_writeverf3(xdr, resp->verf)) - return 0; + return false; break; default: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index eaa3a0cf38..bb8e2f6d7d 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -751,58 +751,26 @@ static int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, return ret; } -__be32 -nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl) +__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl, + struct nfsd_attrs *attr) { - __be32 error; int host_error; - struct dentry *dentry; - struct inode *inode; - struct posix_acl *pacl = NULL, *dpacl = NULL; unsigned int flags = 0; - /* Get inode */ - error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); - if (error) - return error; - - dentry = fhp->fh_dentry; - inode = d_inode(dentry); + if (!acl) + return nfs_ok; - if (S_ISDIR(inode->i_mode)) + if (type == NF4DIR) flags = NFS4_ACL_DIR; - host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); + host_error = nfs4_acl_nfsv4_to_posix(acl, &attr->na_pacl, + &attr->na_dpacl, flags); if (host_error == -EINVAL) return nfserr_attrnotsupp; - if (host_error < 0) - goto out_nfserr; - - fh_lock(fhp); - - host_error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, pacl); - if (host_error < 0) - goto out_drop_lock; - - if (S_ISDIR(inode->i_mode)) { - host_error = set_posix_acl(&init_user_ns, inode, - ACL_TYPE_DEFAULT, dpacl); - } - -out_drop_lock: - fh_unlock(fhp); - - posix_acl_release(pacl); - posix_acl_release(dpacl); -out_nfserr: - if (host_error == -EOPNOTSUPP) - return nfserr_attrnotsupp; else return nfserrno(host_error); } - static short ace2type(struct nfs4_ace *ace) { diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 97f517e9b4..4eae2c5af2 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -76,6 +76,17 @@ static __be32 *xdr_encode_empty_array(__be32 *p) * 1 Protocol" */ +static void encode_uint32(struct xdr_stream *xdr, u32 n) +{ + WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0); +} + +static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap, + size_t len) +{ + WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0); +} + /* * nfs_cb_opnum4 * @@ -121,7 +132,7 @@ static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh) BUG_ON(length > NFS4_FHSIZE); p = xdr_reserve_space(xdr, 4 + length); - xdr_encode_opaque(p, &fh->fh_base, length); + xdr_encode_opaque(p, &fh->fh_raw, length); } /* @@ -328,6 +339,24 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, hdr->nops++; } +/* + * CB_RECALLANY4args + * + * struct CB_RECALLANY4args { + * uint32_t craa_objects_to_keep; + * bitmap4 craa_type_mask; + * }; + */ +static void +encode_cb_recallany4args(struct xdr_stream *xdr, + struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra) +{ + encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY); + encode_uint32(xdr, ra->ra_keep); + encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval)); + hdr->nops++; +} + /* * CB_SEQUENCE4args * @@ -482,6 +511,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, encode_cb_nops(&hdr); } +/* + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects + */ +static void +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfsd4_callback *cb = data; + struct nfsd4_cb_recall_any *ra; + struct nfs4_cb_compound_hdr hdr = { + .ident = cb->cb_clp->cl_cb_ident, + .minorversion = cb->cb_clp->cl_minorversion, + }; + + ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb); + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_recallany4args(xdr, &hdr, ra); + encode_cb_nops(&hdr); +} /* * NFSv4.0 and NFSv4.1 XDR decode functions @@ -520,6 +569,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); } +/* + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects + */ +static int +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfsd4_callback *cb = data; + struct nfs4_cb_compound_hdr hdr; + int status; + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + return status; + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status); + return status; +} + #ifdef CONFIG_NFSD_PNFS /* * CB_LAYOUTRECALL4args @@ -679,7 +750,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, * case NFS4_OK: * write_response4 coa_resok4; * default: - * length4 coa_bytes_copied; + * length4 coa_bytes_copied; * }; * struct CB_OFFLOAD4args { * nfs_fh4 coa_fh; @@ -688,21 +759,22 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, * }; */ static void encode_offload_info4(struct xdr_stream *xdr, - __be32 nfserr, - const struct nfsd4_copy *cp) + const struct nfsd4_cb_offload *cbo) { __be32 *p; p = xdr_reserve_space(xdr, 4); - *p++ = nfserr; - if (!nfserr) { + *p = cbo->co_nfserr; + switch (cbo->co_nfserr) { + case nfs_ok: p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE); p = xdr_encode_empty_array(p); - p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written); - *p++ = cpu_to_be32(cp->cp_res.wr_stable_how); - p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data, + p = xdr_encode_hyper(p, cbo->co_res.wr_bytes_written); + *p++ = cpu_to_be32(cbo->co_res.wr_stable_how); + p = xdr_encode_opaque_fixed(p, cbo->co_res.wr_verifier.data, NFS4_VERIFIER_SIZE); - } else { + break; + default: p = xdr_reserve_space(xdr, 8); /* We always return success if bytes were written */ p = xdr_encode_hyper(p, 0); @@ -710,18 +782,16 @@ static void encode_offload_info4(struct xdr_stream *xdr, } static void encode_cb_offload4args(struct xdr_stream *xdr, - __be32 nfserr, - const struct knfsd_fh *fh, - const struct nfsd4_copy *cp, + const struct nfsd4_cb_offload *cbo, struct nfs4_cb_compound_hdr *hdr) { __be32 *p; p = xdr_reserve_space(xdr, 4); - *p++ = cpu_to_be32(OP_CB_OFFLOAD); - encode_nfs_fh4(xdr, fh); - encode_stateid4(xdr, &cp->cp_res.cb_stateid); - encode_offload_info4(xdr, nfserr, cp); + *p = cpu_to_be32(OP_CB_OFFLOAD); + encode_nfs_fh4(xdr, &cbo->co_fh); + encode_stateid4(xdr, &cbo->co_res.cb_stateid); + encode_offload_info4(xdr, cbo); hdr->nops++; } @@ -731,8 +801,8 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, const void *data) { const struct nfsd4_callback *cb = data; - const struct nfsd4_copy *cp = - container_of(cb, struct nfsd4_copy, cp_cb); + const struct nfsd4_cb_offload *cbo = + container_of(cb, struct nfsd4_cb_offload, co_cb); struct nfs4_cb_compound_hdr hdr = { .ident = 0, .minorversion = cb->cb_clp->cl_minorversion, @@ -740,7 +810,7 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, encode_cb_compound4args(xdr, &hdr); encode_cb_sequence4args(xdr, cb, &hdr); - encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr); + encode_cb_offload4args(xdr, cbo, &hdr); encode_cb_nops(&hdr); } @@ -784,6 +854,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { #endif PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), + PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any), }; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; @@ -1374,11 +1445,21 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_holds_slot = false; } -void nfsd4_run_cb(struct nfsd4_callback *cb) +/** + * nfsd4_run_cb - queue up a callback job to run + * @cb: callback to queue + * + * Kick off a callback to do its thing. Returns false if it was already + * on a queue, true otherwise. + */ +bool nfsd4_run_cb(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; + bool queued; nfsd41_cb_inflight_begin(clp); - if (!nfsd4_queue_cb(cb)) + queued = nfsd4_queue_cb(cb); + if (!queued) nfsd41_cb_inflight_end(clp); + return queued; } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index f92161ce1f..5e9809aff3 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -41,6 +41,7 @@ #include "idmap.h" #include "nfsd.h" #include "netns.h" +#include "vfs.h" /* * Turn off idmapping when using AUTH_SYS. @@ -82,8 +83,8 @@ ent_init(struct cache_head *cnew, struct cache_head *citm) new->id = itm->id; new->type = itm->type; - strlcpy(new->name, itm->name, sizeof(new->name)); - strlcpy(new->authname, itm->authname, sizeof(new->authname)); + strscpy(new->name, itm->name, sizeof(new->name)); + strscpy(new->authname, itm->authname, sizeof(new->authname)); } static void @@ -548,7 +549,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen return nfserr_badowner; memcpy(key.name, name, namelen); key.name[namelen] = '\0'; - strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); + strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, nametoid_lookup, &key, nn->nametoid_cache, &item); if (ret == -ENOENT) return nfserr_badowner; @@ -584,7 +585,7 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr, int ret; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); + strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); if (ret == -ENOENT) return encode_ascii_id(xdr, id); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 2673019d30..e4e23b2a3e 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -421,7 +421,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL); if (!new) return nfserr_jukebox; - memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg)); + memcpy(&new->lo_seg, seg, sizeof(new->lo_seg)); new->lo_state = ls; spin_lock(&fp->fi_lock); @@ -657,7 +657,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) ktime_t now, cutoff; const struct nfsd4_layout_ops *ops; - + trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task); switch (task->tk_status) { case 0: case -NFS4ERR_DELAY: diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f71af990e1..e0ff221286 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -37,6 +37,9 @@ #include #include #include +#include +#include + #include #include @@ -62,36 +65,6 @@ MODULE_PARM_DESC(nfsd4_ssc_umount_timeout, "idle msecs before unmount export from source server"); #endif -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL -#include - -static inline void -nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) -{ - struct inode *inode = d_inode(resfh->fh_dentry); - int status; - - inode_lock(inode); - status = security_inode_setsecctx(resfh->fh_dentry, - label->data, label->len); - inode_unlock(inode); - - if (status) - /* - * XXX: We should really fail the whole open, but we may - * already have created a new file, so it may be too - * late. For now this seems the least of evils: - */ - bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - - return; -} -#else -static inline void -nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) -{ } -#endif - #define NFSDDBG_FACILITY NFSDDBG_PROC static u32 nfsd_attrmask[] = { @@ -156,26 +129,6 @@ is_create_with_attrs(struct nfsd4_open *open) || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1); } -/* - * if error occurs when setting the acl, just clear the acl bit - * in the returned attr bitmap. - */ -static void -do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl, u32 *bmval) -{ - __be32 status; - - status = nfsd4_set_nfs4_acl(rqstp, fhp, acl); - if (status) - /* - * We should probably fail the whole open at this point, - * but we've already created the file, so it's too late; - * So this seems the least of evils: - */ - bmval[0] &= ~FATTR4_WORD0_ACL; -} - static inline void fh_dup2(struct svc_fh *dst, struct svc_fh *src) { @@ -189,7 +142,6 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src) static __be32 do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode) { - __be32 status; if (open->op_truncate && !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) @@ -204,9 +156,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs if (open->op_share_deny & NFS4_SHARE_DENY_READ) accmode |= NFSD_MAY_WRITE; - status = fh_verify(rqstp, current_fh, S_IFREG, accmode); - - return status; + return fh_verify(rqstp, current_fh, S_IFREG, accmode); } static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) @@ -235,6 +185,202 @@ static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate &resfh->fh_handle); } +static inline bool nfsd4_create_is_exclusive(int createmode) +{ + return createmode == NFS4_CREATE_EXCLUSIVE || + createmode == NFS4_CREATE_EXCLUSIVE4_1; +} + +static __be32 +nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child, + struct nfsd4_open *open) +{ + struct file *filp; + struct path path; + int oflags; + + oflags = O_CREAT | O_LARGEFILE; + switch (open->op_share_access & NFS4_SHARE_ACCESS_BOTH) { + case NFS4_SHARE_ACCESS_WRITE: + oflags |= O_WRONLY; + break; + case NFS4_SHARE_ACCESS_BOTH: + oflags |= O_RDWR; + break; + default: + oflags |= O_RDONLY; + } + + path.mnt = fhp->fh_export->ex_path.mnt; + path.dentry = child; + filp = dentry_create(&path, oflags, open->op_iattr.ia_mode, + current_cred()); + if (IS_ERR(filp)) + return nfserrno(PTR_ERR(filp)); + + open->op_filp = filp; + return nfs_ok; +} + +/* + * Implement NFSv4's unchecked, guarded, and exclusive create + * semantics for regular files. Open state for this new file is + * subsequently fabricated in nfsd4_process_open2(). + * + * Upon return, caller must release @fhp and @resfhp. + */ +static __be32 +nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct svc_fh *resfhp, struct nfsd4_open *open) +{ + struct iattr *iap = &open->op_iattr; + struct nfsd_attrs attrs = { + .na_iattr = iap, + .na_seclabel = &open->op_label, + }; + struct dentry *parent, *child; + __u32 v_mtime, v_atime; + struct inode *inode; + __be32 status; + int host_err; + + if (isdotent(open->op_fname, open->op_fnamelen)) + return nfserr_exist; + if (!(iap->ia_valid & ATTR_MODE)) + iap->ia_mode = 0; + + status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); + if (status != nfs_ok) + return status; + parent = fhp->fh_dentry; + inode = d_inode(parent); + + host_err = fh_want_write(fhp); + if (host_err) + return nfserrno(host_err); + + if (is_create_with_attrs(open)) + nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs); + + inode_lock_nested(inode, I_MUTEX_PARENT); + + child = lookup_one_len(open->op_fname, parent, open->op_fnamelen); + if (IS_ERR(child)) { + status = nfserrno(PTR_ERR(child)); + goto out; + } + + if (d_really_is_negative(child)) { + status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); + if (status != nfs_ok) + goto out; + } + + status = fh_compose(resfhp, fhp->fh_export, child, fhp); + if (status != nfs_ok) + goto out; + + v_mtime = 0; + v_atime = 0; + if (nfsd4_create_is_exclusive(open->op_createmode)) { + u32 *verifier = (u32 *)open->op_verf.data; + + /* + * Solaris 7 gets confused (bugid 4218508) if these have + * the high bit set, as do xfs filesystems without the + * "bigtime" feature. So just clear the high bits. If this + * is ever changed to use different attrs for storing the + * verifier, then do_open_lookup() will also need to be + * fixed accordingly. + */ + v_mtime = verifier[0] & 0x7fffffff; + v_atime = verifier[1] & 0x7fffffff; + } + + if (d_really_is_positive(child)) { + status = nfs_ok; + + /* NFSv4 protocol requires change attributes even though + * no change happened. + */ + fh_fill_both_attrs(fhp); + + switch (open->op_createmode) { + case NFS4_CREATE_UNCHECKED: + if (!d_is_reg(child)) + break; + + /* + * In NFSv4, we don't want to truncate the file + * now. This would be wrong if the OPEN fails for + * some other reason. Furthermore, if the size is + * nonzero, we should ignore it according to spec! + */ + open->op_truncate = (iap->ia_valid & ATTR_SIZE) && + !iap->ia_size; + break; + case NFS4_CREATE_GUARDED: + status = nfserr_exist; + break; + case NFS4_CREATE_EXCLUSIVE: + if (d_inode(child)->i_mtime.tv_sec == v_mtime && + d_inode(child)->i_atime.tv_sec == v_atime && + d_inode(child)->i_size == 0) { + open->op_created = true; + break; /* subtle */ + } + status = nfserr_exist; + break; + case NFS4_CREATE_EXCLUSIVE4_1: + if (d_inode(child)->i_mtime.tv_sec == v_mtime && + d_inode(child)->i_atime.tv_sec == v_atime && + d_inode(child)->i_size == 0) { + open->op_created = true; + goto set_attr; /* subtle */ + } + status = nfserr_exist; + } + goto out; + } + + if (!IS_POSIXACL(inode)) + iap->ia_mode &= ~current_umask(); + + fh_fill_pre_attrs(fhp); + status = nfsd4_vfs_create(fhp, child, open); + if (status != nfs_ok) + goto out; + open->op_created = true; + fh_fill_post_attrs(fhp); + + /* A newly created file already has a file size of zero. */ + if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) + iap->ia_valid &= ~ATTR_SIZE; + if (nfsd4_create_is_exclusive(open->op_createmode)) { + iap->ia_valid = ATTR_MTIME | ATTR_ATIME | + ATTR_MTIME_SET|ATTR_ATIME_SET; + iap->ia_mtime.tv_sec = v_mtime; + iap->ia_atime.tv_sec = v_atime; + iap->ia_mtime.tv_nsec = 0; + iap->ia_atime.tv_nsec = 0; + } + +set_attr: + status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); + + if (attrs.na_labelerr) + open->op_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (attrs.na_aclerr) + open->op_bmval[0] &= ~FATTR4_WORD0_ACL; +out: + inode_unlock(inode); + nfsd_attrs_free(&attrs); + if (child && !IS_ERR(child)) + dput(child); + fh_drop_write(fhp); + return status; +} + static __be32 do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh) { @@ -264,47 +410,33 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru * yes | yes | GUARDED4 | GUARDED4 */ - /* - * Note: create modes (UNCHECKED,GUARDED...) are the same - * in NFSv4 as in v3 except EXCLUSIVE4_1. - */ current->fs->umask = open->op_umask; - status = do_nfsd_create(rqstp, current_fh, open->op_fname, - open->op_fnamelen, &open->op_iattr, - *resfh, open->op_createmode, - (u32 *)open->op_verf.data, - &open->op_truncate, &open->op_created); + status = nfsd4_create_file(rqstp, current_fh, *resfh, open); current->fs->umask = 0; - if (!status && open->op_label.len) - nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); - /* * Following rfc 3530 14.2.16, and rfc 5661 18.16.4 * use the returned bitmask to indicate which attributes * we used to store the verifier: */ - if (nfsd_create_is_exclusive(open->op_createmode) && status == 0) + if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0) open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_MODIFY); - } else - /* - * Note this may exit with the parent still locked. - * We will hold the lock until nfsd4_open's final - * lookup, to prevent renames or unlinks until we've had - * a chance to an acquire a delegation if appropriate. - */ + } else { status = nfsd_lookup(rqstp, current_fh, open->op_fname, open->op_fnamelen, *resfh); + if (!status) + /* NFSv4 protocol requires change attributes even though + * no change happened. + */ + fh_fill_both_attrs(current_fh); + } if (status) goto out; status = nfsd_check_obj_isreg(*resfh); if (status) goto out; - if (is_create_with_attrs(open) && open->op_acl != NULL) - do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval); - nfsd4_set_open_owner_reply_cache(cstate, open, *resfh); accmode = NFSD_MAY_NOP; if (open->op_created || @@ -320,7 +452,6 @@ static __be32 do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { struct svc_fh *current_fh = &cstate->current_fh; - __be32 status; int accmode = 0; /* We don't know the target directory, and therefore can not @@ -345,9 +476,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, str if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH) accmode = NFSD_MAY_OWNER_OVERRIDE; - status = do_open_permission(rqstp, current_fh, open, accmode); - - return status; + return do_open_permission(rqstp, current_fh, open, accmode); } static void @@ -375,6 +504,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, (int)open->op_fnamelen, open->op_fname, open->op_openowner); + open->op_filp = NULL; + open->op_rqstp = rqstp; + /* This check required by spec. */ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; @@ -427,50 +559,46 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; switch (open->op_claim_type) { - case NFS4_OPEN_CLAIM_DELEGATE_CUR: - case NFS4_OPEN_CLAIM_NULL: - status = do_open_lookup(rqstp, cstate, open, &resfh); - if (status) - goto out; - break; - case NFS4_OPEN_CLAIM_PREVIOUS: - status = nfs4_check_open_reclaim(cstate->clp); - if (status) - goto out; - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; - reclaim = true; - fallthrough; - case NFS4_OPEN_CLAIM_FH: - case NFS4_OPEN_CLAIM_DELEG_CUR_FH: - status = do_open_fhandle(rqstp, cstate, open); - if (status) - goto out; - resfh = &cstate->current_fh; - break; - case NFS4_OPEN_CLAIM_DELEG_PREV_FH: - case NFS4_OPEN_CLAIM_DELEGATE_PREV: - dprintk("NFSD: unsupported OPEN claim type %d\n", - open->op_claim_type); - status = nfserr_notsupp; + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_NULL: + status = do_open_lookup(rqstp, cstate, open, &resfh); + if (status) goto out; - default: - dprintk("NFSD: Invalid OPEN claim type %d\n", - open->op_claim_type); - status = nfserr_inval; + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + status = nfs4_check_open_reclaim(cstate->clp); + if (status) goto out; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + reclaim = true; + fallthrough; + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + status = do_open_fhandle(rqstp, cstate, open); + if (status) + goto out; + resfh = &cstate->current_fh; + break; + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + status = nfserr_notsupp; + goto out; + default: + status = nfserr_inval; + goto out; } - /* - * nfsd4_process_open2() does the actual opening of the file. If - * successful, it (1) truncates the file if open->op_truncate was - * set, (2) sets open->op_stateid, (3) sets open->op_delegation. - */ + status = nfsd4_process_open2(rqstp, resfh, open); - WARN(status && open->op_created, - "nfsd4_process_open2 failed to open newly-created file! status=%u\n", - be32_to_cpu(status)); + if (status && open->op_created) + pr_warn("nfsd4_process_open2 failed to open newly-created file: status=%u\n", + be32_to_cpu(status)); if (reclaim && !status) nn->somebody_reclaimed = true; out: + if (open->op_filp) { + fput(open->op_filp); + open->op_filp = NULL; + } if (resfh && resfh != &cstate->current_fh) { fh_dup2(&cstate->current_fh, resfh); fh_put(resfh); @@ -519,7 +647,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fh_put(&cstate->current_fh); cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; - memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, + memcpy(&cstate->current_fh.fh_handle.fh_raw, putfh->pf_fhval, putfh->pf_fhlen); ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); #ifdef CONFIG_NFSD_V4_2_INTER_SSC @@ -535,11 +663,9 @@ static __be32 nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - __be32 status; - fh_put(&cstate->current_fh); - status = exp_pseudoroot(rqstp, &cstate->current_fh); - return status; + + return exp_pseudoroot(rqstp, &cstate->current_fh); } static __be32 @@ -598,7 +724,7 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data)); - nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id)); + nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id)); } static __be32 @@ -606,10 +732,19 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_commit *commit = &u->commit; + struct nfsd_file *nf; + __be32 status; + + status = nfsd_file_acquire(rqstp, &cstate->current_fh, NFSD_MAY_WRITE | + NFSD_MAY_NOT_BREAK_LEASE, &nf); + if (status != nfs_ok) + return status; - return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, + status = nfsd_commit(rqstp, &cstate->current_fh, nf, commit->co_offset, commit->co_count, (__be32 *)commit->co_verf.data); + nfsd_file_put(nf); + return status; } static __be32 @@ -617,6 +752,10 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_create *create = &u->create; + struct nfsd_attrs attrs = { + .na_iattr = &create->cr_iattr, + .na_seclabel = &create->cr_label, + }; struct svc_fh resfh; __be32 status; dev_t rdev; @@ -632,12 +771,13 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; + status = nfsd4_acl_to_attr(create->cr_type, create->cr_acl, &attrs); current->fs->umask = create->cr_umask; switch (create->cr_type) { case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - create->cr_data, &resfh); + create->cr_data, &attrs, &resfh); break; case NF4BLK: @@ -648,7 +788,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFBLK, rdev, &resfh); + &attrs, S_IFBLK, rdev, &resfh); break; case NF4CHR: @@ -659,26 +799,26 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr,S_IFCHR, rdev, &resfh); + &attrs, S_IFCHR, rdev, &resfh); break; case NF4SOCK: status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFSOCK, 0, &resfh); + &attrs, S_IFSOCK, 0, &resfh); break; case NF4FIFO: status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFIFO, 0, &resfh); + &attrs, S_IFIFO, 0, &resfh); break; case NF4DIR: create->cr_iattr.ia_valid &= ~ATTR_SIZE; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFDIR, 0, &resfh); + &attrs, S_IFDIR, 0, &resfh); break; default: @@ -688,20 +828,17 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - if (create->cr_label.len) - nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); - - if (create->cr_acl != NULL) - do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, - create->cr_bmval); - - fh_unlock(&cstate->current_fh); + if (attrs.na_labelerr) + create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (attrs.na_aclerr) + create->cr_bmval[0] &= ~FATTR4_WORD0_ACL; set_change_info(&create->cr_cinfo, &cstate->current_fh); fh_dup2(&cstate->current_fh, &resfh); out: fh_put(&resfh); out_umask: current->fs->umask = 0; + nfsd_attrs_free(&attrs); return status; } @@ -807,12 +944,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, &read->rd_nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); - goto out; - } - status = nfs_ok; -out: + read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -874,10 +1006,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); - if (!status) { - fh_unlock(&cstate->current_fh); + if (!status) set_change_info(&remove->rm_cinfo, &cstate->current_fh); - } return status; } @@ -917,7 +1047,6 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &exp, &dentry); if (err) return err; - fh_unlock(&cstate->current_fh); if (d_really_is_negative(dentry)) { exp_put(exp); err = nfserr_noent; @@ -972,17 +1101,21 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_setattr *setattr = &u->setattr; + struct nfsd_attrs attrs = { + .na_iattr = &setattr->sa_iattr, + .na_seclabel = &setattr->sa_label, + }; + struct inode *inode; __be32 status = nfs_ok; + bool save_no_wcc; int err; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &setattr->sa_stateid, WR_STATE, NULL, NULL); - if (status) { - dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); + if (status) return status; - } } err = fh_want_write(&cstate->current_fh); if (err) @@ -994,19 +1127,23 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - if (setattr->sa_acl != NULL) - status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, - setattr->sa_acl); - if (status) - goto out; - if (setattr->sa_label.len) - status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, - &setattr->sa_label); + inode = cstate->current_fh.fh_dentry->d_inode; + status = nfsd4_acl_to_attr(S_ISDIR(inode->i_mode) ? NF4DIR : NF4REG, + setattr->sa_acl, &attrs); + if (status) goto out; - status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, + save_no_wcc = cstate->current_fh.fh_no_wcc; + cstate->current_fh.fh_no_wcc = true; + status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, 0, (time64_t)0); + cstate->current_fh.fh_no_wcc = save_no_wcc; + if (!status) + status = nfserrno(attrs.na_labelerr); + if (!status) + status = nfserrno(attrs.na_aclerr); out: + nfsd_attrs_free(&attrs); fh_drop_write(&cstate->current_fh); return status; } @@ -1031,10 +1168,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_offset, cnt); status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, stateid, WR_STATE, &nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); + if (status) return status; - } write->wr_how_written = write->wr_stable_how; @@ -1065,17 +1200,13 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, src_stateid, RD_STATE, src, NULL); - if (status) { - dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); + if (status) goto out; - } status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, dst_stateid, WR_STATE, dst, NULL); - if (status) { - dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); + if (status) goto out_put_src; - } /* fix up for NFS-specific error code */ if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) || @@ -1108,7 +1239,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - status = nfsd4_clone_file_range(src, clone->cl_src_pos, + status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos, dst, clone->cl_dst_pos, clone->cl_count, EX_ISSYNC(cstate->current_fh.fh_export)); @@ -1118,30 +1249,17 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } -void nfs4_put_copy(struct nfsd4_copy *copy) +static void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) return; + kfree(copy->cp_src); kfree(copy); } -static bool -check_and_set_stop_copy(struct nfsd4_copy *copy) -{ - bool value; - - spin_lock(©->cp_clp->async_lock); - value = copy->stopped; - if (!copy->stopped) - copy->stopped = true; - spin_unlock(©->cp_clp->async_lock); - return value; -} - static void nfsd4_stop_copy(struct nfsd4_copy *copy) { - /* only 1 thread should stop the copy */ - if (!check_and_set_stop_copy(copy)) + if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) kthread_stop(copy->copy_task); nfs4_put_copy(copy); } @@ -1182,15 +1300,15 @@ extern void nfs_sb_deactive(struct super_block *sb); * setup a work entry in the ssc delayed unmount list. */ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, - struct nfsd4_ssc_umount_item **retwork, struct vfsmount **ss_mnt) + struct nfsd4_ssc_umount_item **nsui) { - struct nfsd4_ssc_umount_item *ni = 0; + struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *work = NULL; struct nfsd4_ssc_umount_item *tmp; DEFINE_WAIT(wait); + __be32 status = 0; - *ss_mnt = NULL; - *retwork = NULL; + *nsui = NULL; work = kzalloc(sizeof(*work), GFP_KERNEL); try_again: spin_lock(&nn->nfsd_ssc_lock); @@ -1200,13 +1318,12 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, /* found a match */ if (ni->nsui_busy) { /* wait - and try again */ - prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, - TASK_INTERRUPTIBLE); + prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE); spin_unlock(&nn->nfsd_ssc_lock); /* allow 20secs for mount/unmount for now - revisit */ - if (signal_pending(current) || - (schedule_timeout(20*HZ) == 0)) { + if (kthread_should_stop() || + (freezable_schedule_timeout(20*HZ) == 0)) { finish_wait(&nn->nfsd_ssc_waitq, &wait); kfree(work); return nfserr_eagain; @@ -1214,44 +1331,45 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, finish_wait(&nn->nfsd_ssc_waitq, &wait); goto try_again; } - *ss_mnt = ni->nsui_vfsmount; + *nsui = ni; refcount_inc(&ni->nsui_refcnt); spin_unlock(&nn->nfsd_ssc_lock); kfree(work); - /* return vfsmount in ss_mnt */ + /* return vfsmount in (*nsui)->nsui_vfsmount */ return 0; } if (work) { - strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr)); + strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1); refcount_set(&work->nsui_refcnt, 2); work->nsui_busy = true; list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list); - *retwork = work; - } + *nsui = work; + } else + status = nfserr_resource; spin_unlock(&nn->nfsd_ssc_lock); - return 0; + return status; } -static void nfsd4_ssc_update_dul_work(struct nfsd_net *nn, - struct nfsd4_ssc_umount_item *work, struct vfsmount *ss_mnt) +static void nfsd4_ssc_update_dul(struct nfsd_net *nn, + struct nfsd4_ssc_umount_item *nsui, + struct vfsmount *ss_mnt) { - /* set nsui_vfsmount, clear busy flag and wakeup waiters */ spin_lock(&nn->nfsd_ssc_lock); - work->nsui_vfsmount = ss_mnt; - work->nsui_busy = false; + nsui->nsui_vfsmount = ss_mnt; + nsui->nsui_busy = false; wake_up_all(&nn->nfsd_ssc_waitq); spin_unlock(&nn->nfsd_ssc_lock); } -static void nfsd4_ssc_cancel_dul_work(struct nfsd_net *nn, - struct nfsd4_ssc_umount_item *work) +static void nfsd4_ssc_cancel_dul(struct nfsd_net *nn, + struct nfsd4_ssc_umount_item *nsui) { spin_lock(&nn->nfsd_ssc_lock); - list_del(&work->nsui_list); + list_del(&nsui->nsui_list); wake_up_all(&nn->nfsd_ssc_waitq); spin_unlock(&nn->nfsd_ssc_lock); - kfree(work); + kfree(nsui); } /* @@ -1259,7 +1377,7 @@ static void nfsd4_ssc_cancel_dul_work(struct nfsd_net *nn, */ static __be32 nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, - struct vfsmount **mount) + struct nfsd4_ssc_umount_item **nsui) { struct file_system_type *type; struct vfsmount *ss_mnt; @@ -1270,7 +1388,6 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, char *ipaddr, *dev_name, *raw_data; int len, raw_len; __be32 status = nfserr_inval; - struct nfsd4_ssc_umount_item *work = NULL; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); naddr = &nss->u.nl4_addr; @@ -1278,6 +1395,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, naddr->addr_len, (struct sockaddr *)&tmp_addr, sizeof(tmp_addr)); + *nsui = NULL; if (tmp_addrlen == 0) goto out_err; @@ -1320,10 +1438,10 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, goto out_free_rawdata; snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); - status = nfsd4_ssc_setup_dul(nn, ipaddr, &work, &ss_mnt); + status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui); if (status) goto out_free_devname; - if (ss_mnt) + if ((*nsui)->nsui_vfsmount) goto out_done; /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */ @@ -1331,15 +1449,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, module_put(type->owner); if (IS_ERR(ss_mnt)) { status = nfserr_nodev; - if (work) - nfsd4_ssc_cancel_dul_work(nn, work); + nfsd4_ssc_cancel_dul(nn, *nsui); goto out_free_devname; } - if (work) - nfsd4_ssc_update_dul_work(nn, work, ss_mnt); + nfsd4_ssc_update_dul(nn, *nsui, ss_mnt); out_done: status = 0; - *mount = ss_mnt; out_free_devname: kfree(dev_name); @@ -1363,7 +1478,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, static __be32 nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - struct nfsd4_copy *copy, struct vfsmount **mount) + struct nfsd4_copy *copy) { struct svc_fh *s_fh = NULL; stateid_t *s_stid = ©->cp_src_stateid; @@ -1376,14 +1491,14 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, if (status) goto out; - status = nfsd4_interssc_connect(©->cp_src, rqstp, mount); + status = nfsd4_interssc_connect(copy->cp_src, rqstp, ©->ss_nsui); if (status) goto out; s_fh = &cstate->save_fh; copy->c_fh.size = s_fh->fh_handle.fh_size; - memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); + memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_raw, copy->c_fh.size); copy->stateid.seqid = cpu_to_be32(s_stid->si_generation); memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, sizeof(stateid_opaque_t)); @@ -1394,45 +1509,26 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, } static void -nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, +nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp, struct nfsd_file *dst) { - bool found = false; - long timeout; - struct nfsd4_ssc_umount_item *tmp; - struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id); + long timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout); - nfs42_ssc_close(src->nf_file); - nfsd_file_put(dst); - fput(src->nf_file); + nfs42_ssc_close(filp); + fput(filp); - if (!nn) { - mntput(ss_mnt); - return; - } spin_lock(&nn->nfsd_ssc_lock); - timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout); - list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { - if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) { - list_del(&ni->nsui_list); - /* - * vfsmount can be shared by multiple exports, - * decrement refcnt. If the count drops to 1 it - * will be unmounted when nsui_expire expires. - */ - refcount_dec(&ni->nsui_refcnt); - ni->nsui_expire = jiffies + timeout; - list_add_tail(&ni->nsui_list, &nn->nfsd_ssc_mount_list); - found = true; - break; - } - } + list_del(&nsui->nsui_list); + /* + * vfsmount can be shared by multiple exports, + * decrement refcnt. If the count drops to 1 it + * will be unmounted when nsui_expire expires. + */ + refcount_dec(&nsui->nsui_refcnt); + nsui->nsui_expire = jiffies + timeout; + list_add_tail(&nsui->nsui_list, &nn->nfsd_ssc_mount_list); spin_unlock(&nn->nfsd_ssc_lock); - if (!found) { - mntput(ss_mnt); - return; - } } #else /* CONFIG_NFSD_V4_2_INTER_SSC */ @@ -1440,15 +1536,13 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, static __be32 nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - struct nfsd4_copy *copy, - struct vfsmount **mount) + struct nfsd4_copy *copy) { - *mount = NULL; return nfserr_inval; } static void -nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, +nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp, struct nfsd_file *dst) { } @@ -1471,23 +1565,21 @@ nfsd4_setup_intra_ssc(struct svc_rqst *rqstp, ©->nf_dst); } -static void -nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst) -{ - nfsd_file_put(src); - nfsd_file_put(dst); -} - static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { - struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb); + struct nfsd4_cb_offload *cbo = + container_of(cb, struct nfsd4_cb_offload, co_cb); - nfs4_put_copy(copy); + kfree(cbo); } static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, struct rpc_task *task) { + struct nfsd4_cb_offload *cbo = + container_of(cb, struct nfsd4_cb_offload, co_cb); + + trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task); return 1; } @@ -1499,15 +1591,16 @@ static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = { static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) { copy->cp_res.wr_stable_how = - copy->committed ? NFS_FILE_SYNC : NFS_UNSTABLE; - copy->cp_synchronous = sync; + test_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags) ? + NFS_FILE_SYNC : NFS_UNSTABLE; + nfsd4_copy_set_sync(copy, sync); gen_boot_verifier(©->cp_res.wr_verifier, copy->cp_clp->net); } -static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) +static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy, + struct file *dst, + struct file *src) { - struct file *dst = copy->nf_dst->nf_file; - struct file *src = copy->nf_src->nf_file; errseq_t since; ssize_t bytes_copied = 0; u64 bytes_total = copy->cp_count; @@ -1530,26 +1623,29 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) copy->cp_res.wr_bytes_written += bytes_copied; src_pos += bytes_copied; dst_pos += bytes_copied; - } while (bytes_total > 0 && !copy->cp_synchronous); + } while (bytes_total > 0 && nfsd4_copy_is_async(copy)); /* for a non-zero asynchronous copy do a commit of data */ - if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) { + if (nfsd4_copy_is_async(copy) && copy->cp_res.wr_bytes_written > 0) { since = READ_ONCE(dst->f_wb_err); end = copy->cp_dst_pos + copy->cp_res.wr_bytes_written - 1; status = vfs_fsync_range(dst, copy->cp_dst_pos, end, 0); if (!status) status = filemap_check_wb_err(dst->f_mapping, since); if (!status) - copy->committed = true; + set_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags); } return bytes_copied; } -static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) +static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, + struct file *src, struct file *dst, + bool sync) { __be32 status; ssize_t bytes; - bytes = _nfsd_copy_file_range(copy); + bytes = _nfsd_copy_file_range(copy, dst, src); + /* for async copy, we ignore the error, client can always retry * to get the error */ @@ -1559,13 +1655,6 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) nfsd4_init_copy_res(copy, sync); status = nfs_ok; } - - if (!copy->cp_intra) /* Inter server SSC */ - nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src, - copy->nf_dst); - else - nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); - return status; } @@ -1574,73 +1663,100 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) dst->cp_src_pos = src->cp_src_pos; dst->cp_dst_pos = src->cp_dst_pos; dst->cp_count = src->cp_count; - dst->cp_synchronous = src->cp_synchronous; + dst->cp_flags = src->cp_flags; memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res)); memcpy(&dst->fh, &src->fh, sizeof(src->fh)); dst->cp_clp = src->cp_clp; dst->nf_dst = nfsd_file_get(src->nf_dst); - dst->cp_intra = src->cp_intra; - if (src->cp_intra) /* for inter, file_src doesn't exist yet */ + /* for inter, nf_src doesn't exist yet */ + if (!nfsd4_ssc_is_inter(src)) dst->nf_src = nfsd_file_get(src->nf_src); memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); - memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server)); + memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server)); memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); - dst->ss_mnt = src->ss_mnt; + dst->ss_nsui = src->ss_nsui; +} + +static void release_copy_files(struct nfsd4_copy *copy) +{ + if (copy->nf_src) + nfsd_file_put(copy->nf_src); + if (copy->nf_dst) + nfsd_file_put(copy->nf_dst); } static void cleanup_async_copy(struct nfsd4_copy *copy) { nfs4_free_copy_state(copy); - nfsd_file_put(copy->nf_dst); - if (copy->cp_intra) - nfsd_file_put(copy->nf_src); - spin_lock(©->cp_clp->async_lock); - list_del(©->copies); - spin_unlock(©->cp_clp->async_lock); + release_copy_files(copy); + if (copy->cp_clp) { + spin_lock(©->cp_clp->async_lock); + if (!list_empty(©->copies)) + list_del_init(©->copies); + spin_unlock(©->cp_clp->async_lock); + } nfs4_put_copy(copy); } +static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr) +{ + struct nfsd4_cb_offload *cbo; + + cbo = kzalloc(sizeof(*cbo), GFP_KERNEL); + if (!cbo) + return; + + memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res)); + memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh)); + cbo->co_nfserr = nfserr; + + nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops, + NFSPROC4_CLNT_CB_OFFLOAD); + trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid, + &cbo->co_fh, copy->cp_count, nfserr); + nfsd4_run_cb(&cbo->co_cb); +} + +/** + * nfsd4_do_async_copy - kthread function for background server-side COPY + * @data: arguments for COPY operation + * + * Return values: + * %0: Copy operation is done. + */ static int nfsd4_do_async_copy(void *data) { struct nfsd4_copy *copy = (struct nfsd4_copy *)data; - struct nfsd4_copy *cb_copy; + __be32 nfserr; - if (!copy->cp_intra) { /* Inter server SSC */ - copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL); - if (!copy->nf_src) { - copy->nfserr = nfserr_serverfault; - /* ss_mnt will be unmounted by the laundromat */ - goto do_callback; - } - copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, - ©->stateid); - if (IS_ERR(copy->nf_src->nf_file)) { - copy->nfserr = nfserr_offload_denied; + if (nfsd4_ssc_is_inter(copy)) { + struct file *filp; + + filp = nfs42_ssc_open(copy->ss_nsui->nsui_vfsmount, + ©->c_fh, ©->stateid); + if (IS_ERR(filp)) { + switch (PTR_ERR(filp)) { + case -EBADF: + nfserr = nfserr_wrong_type; + break; + default: + nfserr = nfserr_offload_denied; + } /* ss_mnt will be unmounted by the laundromat */ goto do_callback; } + nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, + false); + nfsd4_cleanup_inter_ssc(copy->ss_nsui, filp, copy->nf_dst); + } else { + nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, false); } - copy->nfserr = nfsd4_do_copy(copy, 0); do_callback: - cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); - if (!cb_copy) - goto out; - refcount_set(&cb_copy->refcount, 1); - memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res)); - cb_copy->cp_clp = copy->cp_clp; - cb_copy->nfserr = copy->nfserr; - memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh)); - nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp, - &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); - trace_nfsd_cb_offload(copy->cp_clp, ©->cp_res.cb_stateid, - ©->fh, copy->cp_count, copy->nfserr); - nfsd4_run_cb(&cb_copy->cp_cb); -out: - if (!copy->cp_intra) - kfree(copy->nf_src); + nfsd4_send_cb_offload(copy, nfserr); cleanup_async_copy(copy); return 0; } @@ -1653,13 +1769,12 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd4_copy *async_copy = NULL; - if (!copy->cp_intra) { /* Inter server SSC */ - if (!inter_copy_offload_enable || copy->cp_synchronous) { + if (nfsd4_ssc_is_inter(copy)) { + if (!inter_copy_offload_enable || nfsd4_copy_is_sync(copy)) { status = nfserr_notsupp; goto out; } - status = nfsd4_setup_inter_ssc(rqstp, cstate, copy, - ©->ss_mnt); + status = nfsd4_setup_inter_ssc(rqstp, cstate, copy); if (status) return nfserr_offload_denied; } else { @@ -1671,17 +1786,21 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, copy->cp_clp = cstate->clp; memcpy(©->fh, &cstate->current_fh.fh_handle, sizeof(struct knfsd_fh)); - if (!copy->cp_synchronous) { + if (nfsd4_copy_is_async(copy)) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); status = nfserrno(-ENOMEM); async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) goto out_err; + INIT_LIST_HEAD(&async_copy->copies); + refcount_set(&async_copy->refcount, 1); + async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); + if (!async_copy->cp_src) + goto out_err; if (!nfs4_init_copy_state(nn, copy)) goto out_err; - refcount_set(&async_copy->refcount, 1); - memcpy(©->cp_res.cb_stateid, ©->cp_stateid.stid, + memcpy(©->cp_res.cb_stateid, ©->cp_stateid.cs_stid, sizeof(copy->cp_res.cb_stateid)); dup_copy_fields(copy, async_copy); async_copy->copy_task = kthread_create(nfsd4_do_async_copy, @@ -1695,18 +1814,24 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, wake_up_process(async_copy->copy_task); status = nfs_ok; } else { - status = nfsd4_do_copy(copy, 1); + status = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, true); } out: + release_copy_files(copy); return status; out_err: + if (nfsd4_ssc_is_inter(copy)) { + /* + * Source's vfsmount of inter-copy will be unmounted + * by the laundromat. Use copy instead of async_copy + * since async_copy->ss_nsui might not be set yet. + */ + refcount_dec(©->ss_nsui->nsui_refcnt); + } if (async_copy) cleanup_async_copy(async_copy); status = nfserrno(-ENOMEM); - /* - * source's vfsmount of inter-copy will be unmounted - * by the laundromat - */ goto out; } @@ -1717,7 +1842,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid) spin_lock(&clp->async_lock); list_for_each_entry(copy, &clp->async_copies, copies) { - if (memcmp(©->cp_stateid.stid, stateid, NFS4_STATEID_SIZE)) + if (memcmp(©->cp_stateid.cs_stid, stateid, NFS4_STATEID_SIZE)) continue; refcount_inc(©->refcount); spin_unlock(&clp->async_lock); @@ -1771,16 +1896,16 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cps = nfs4_alloc_init_cpntf_state(nn, stid); if (!cps) goto out; - memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t)); + memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.cs_stid, sizeof(stateid_t)); memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t)); memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t)); /* For now, only return one server address in cpn_src, the * address used by the client to connect to this server. */ - cn->cpn_src.nl4_type = NL4_NETADDR; + cn->cpn_src->nl4_type = NL4_NETADDR; status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr, - &cn->cpn_src.u.nl4_addr); + &cn->cpn_src->u.nl4_addr); WARN_ON_ONCE(status); if (status) { nfs4_put_cpntf_state(nn, cps); @@ -1801,10 +1926,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &fallocate->falloc_stateid, WR_STATE, &nf, NULL); - if (status != nfs_ok) { - dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); + if (status != nfs_ok) return status; - } status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file, fallocate->falloc_offset, @@ -1860,10 +1983,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &seek->seek_stateid, RD_STATE, &nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); + if (status) return status; - } switch (seek->seek_whence) { case NFS4_CONTENT_DATA: @@ -2434,7 +2555,7 @@ check_if_stalefh_allowed(struct nfsd4_compoundargs *args) return; } putfh = (struct nfsd4_putfh *)&saved_op->u; - if (!copy->cp_intra) + if (nfsd4_ssc_is_inter(copy)) putfh->no_verify = true; } } @@ -2462,11 +2583,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) __be32 status; resp->xdr = &rqstp->rq_res_stream; + resp->statusp = resp->xdr->p; /* reserve space for: NFS status code */ xdr_reserve_space(resp->xdr, XDR_UNIT); - resp->tagp = resp->xdr->p; /* reserve space for: taglen, tag, and opcnt */ xdr_reserve_space(resp->xdr, XDR_UNIT * 2 + args->taglen); resp->taglen = args->taglen; @@ -2499,7 +2620,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) rqstp->rq_lease_breaker = (void **)&cstate->clp; - trace_nfsd_compound(rqstp, args->client_opcnt); + trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; @@ -2536,13 +2657,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto encode_op; } - fh_clear_wcc(current_fh); + fh_clear_pre_post_attrs(current_fh); /* If op is non-idempotent */ if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) { /* * Don't execute this op if we couldn't encode a - * succesful reply: + * successful reply: */ u32 plen = op->opdesc->op_rsize_bop(rqstp, op); /* @@ -2623,28 +2744,49 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) #define op_encode_channel_attrs_maxsz (6 + 1 + 1) -static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +/* + * The _rsize() helpers are invoked by the NFSv4 COMPOUND decoder, which + * is called before sunrpc sets rq_res.buflen. Thus we have to compute + * the maximum payload size here, based on transport limits and the size + * of the remaining space in the rq_pages array. + */ +static u32 nfsd4_max_payload(const struct svc_rqst *rqstp) +{ + u32 buflen; + + buflen = (rqstp->rq_page_end - rqstp->rq_next_page) * PAGE_SIZE; + buflen -= rqstp->rq_auth_slack; + buflen -= rqstp->rq_res.head[0].iov_len; + return min_t(u32, buflen, svc_max_payload(rqstp)); +} + +static u32 nfsd4_only_status_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size) * sizeof(__be32); } -static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_status_stateid_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32); } -static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_access_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { /* ac_supported, ac_resp_access */ return (op_encode_hdr_size + 2)* sizeof(__be32); } -static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_commit_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_create_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); @@ -2655,17 +2797,17 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op * the op prematurely if the estimate is too large. We may turn off splice * reads unnecessarily. */ -static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_getattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 *bmap = op->u.getattr.ga_bmval; + const u32 *bmap = op->u.getattr.ga_bmval; u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2]; u32 ret = 0; if (bmap0 & FATTR4_WORD0_ACL) - return svc_max_payload(rqstp); + return nfsd4_max_payload(rqstp); if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) - return svc_max_payload(rqstp); + return nfsd4_max_payload(rqstp); if (bmap1 & FATTR4_WORD1_OWNER) { ret += IDMAP_NAMESZ + 4; @@ -2693,24 +2835,28 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, return ret; } -static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_getfh_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE; } -static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_link_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_lock_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_lock_denied_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_open_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_stateid_maxsz + op_encode_change_info_maxsz + 1 @@ -2718,20 +2864,18 @@ static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) + op_encode_delegation_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_read_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount = 0, rlen = 0; - - maxcount = svc_max_payload(rqstp); - rlen = min(op->u.read.rd_length, maxcount); + u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount = svc_max_payload(rqstp); - u32 rlen = min(op->u.read.rd_length, maxcount); + u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp)); /* * If we detect that the file changed during hole encoding, then we * recover by encoding the remaining reply as data. This means we need @@ -2742,70 +2886,77 @@ static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_readdir_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount = 0, rlen = 0; - - maxcount = svc_max_payload(rqstp); - rlen = min(op->u.readdir.rd_maxcount, maxcount); + u32 rlen = min(op->u.readdir.rd_maxcount, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + op_encode_verifier_maxsz + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_readlink_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE; } -static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_remove_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_rename_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz + op_encode_change_info_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_sequence_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32); } -static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_test_stateid_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids) * sizeof(__be32); } -static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_setattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_secinfo_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR * (4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32); } -static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_setclientid_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * sizeof(__be32); } -static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_write_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_exchange_id_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ 1 + 1 + /* eir_flags, spr_how */\ @@ -2819,14 +2970,16 @@ static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_o 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32); } -static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_bind_conn_to_session_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\ 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32); } -static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_create_session_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\ @@ -2835,7 +2988,8 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd op_encode_channel_attrs_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_copy_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* wr_callback */ + @@ -2847,16 +3001,16 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) 1 /* cr_synchronous */) * sizeof(__be32); } -static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_offload_status_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 /* osr_count */ + 1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32); } -static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 3 /* cnr_lease_time */ + @@ -2871,12 +3025,10 @@ static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp, } #ifdef CONFIG_NFSD_PNFS -static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount = 0, rlen = 0; - - maxcount = svc_max_payload(rqstp); - rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount); + u32 rlen = min(op->u.getdeviceinfo.gd_maxcount, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + 1 /* gd_layout_type*/ + @@ -2889,7 +3041,8 @@ static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4 * so we need to define an arbitrary upper bound here. */ #define MAX_LAYOUT_SIZE 128 -static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_layoutget_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* logr_return_on_close */ + @@ -2898,14 +3051,16 @@ static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op MAX_LAYOUT_SIZE) * sizeof(__be32); } -static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_layoutcommit_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* locr_newsize */ + 2 /* ns_size */) * sizeof(__be32); } -static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* lrs_stateid */ + @@ -2914,41 +3069,36 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_ #endif /* CONFIG_NFSD_PNFS */ -static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 3) * sizeof(__be32); } -static inline u32 nfsd4_getxattr_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_getxattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount, rlen; - - maxcount = svc_max_payload(rqstp); - rlen = min_t(u32, XATTR_SIZE_MAX, maxcount); + u32 rlen = min_t(u32, XATTR_SIZE_MAX, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_setxattr_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_setxattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_listxattrs_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_listxattrs_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount, rlen; - - maxcount = svc_max_payload(rqstp); - rlen = min(op->u.listxattrs.lsxa_maxcount, maxcount); + u32 rlen = min(op->u.listxattrs.lsxa_maxcount, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_removexattr_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_removexattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); @@ -3437,6 +3587,7 @@ static const struct svc_procedure nfsd_procedures4[2] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 1, @@ -3447,6 +3598,7 @@ static const struct svc_procedure nfsd_procedures4[2] = { .pc_decode = nfs4svc_decode_compoundargs, .pc_encode = nfs4svc_encode_compoundres, .pc_argsize = sizeof(struct nfsd4_compoundargs), + .pc_argzero = offsetof(struct nfsd4_compoundargs, iops), .pc_ressize = sizeof(struct nfsd4_compoundres), .pc_release = nfsd4_release_compoundargs, .pc_cachetype = RC_NOCACHE, diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 8f24485e0f..5d680045fa 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -807,17 +807,17 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, if (get_user(namelen, &ci->cc_name.cn_len)) return -EFAULT; name.data = memdup_user(&ci->cc_name.cn_id, namelen); - if (IS_ERR_OR_NULL(name.data)) - return -EFAULT; + if (IS_ERR(name.data)) + return PTR_ERR(name.data); name.len = namelen; get_user(princhashlen, &ci->cc_princhash.cp_len); if (princhashlen > 0) { princhash.data = memdup_user( &ci->cc_princhash.cp_data, princhashlen); - if (IS_ERR_OR_NULL(princhash.data)) { + if (IS_ERR(princhash.data)) { kfree(name.data); - return -EFAULT; + return PTR_ERR(princhash.data); } princhash.len = princhashlen; } else @@ -829,8 +829,8 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, if (get_user(namelen, &cnm->cn_len)) return -EFAULT; name.data = memdup_user(&cnm->cn_id, namelen); - if (IS_ERR_OR_NULL(name.data)) - return -EFAULT; + if (IS_ERR(name.data)) + return PTR_ERR(name.data); name.len = namelen; } if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9b660491f3..d07176eee9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -44,7 +44,9 @@ #include #include #include +#include #include + #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -84,6 +86,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); +static void nfsd4_file_hash_remove(struct nfs4_file *fi); /* Locking: */ @@ -125,6 +128,23 @@ static void free_session(struct nfsd4_session *); static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; +static struct workqueue_struct *laundry_wq; + +int nfsd4_create_laundry_wq(void) +{ + int rc = 0; + + laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); + if (laundry_wq == NULL) + rc = -ENOMEM; + return rc; +} + +void nfsd4_destroy_laundry_wq(void) +{ + destroy_workqueue(laundry_wq); +} + static bool is_session_dead(struct nfsd4_session *ses) { return ses->se_flags & NFS4_SESSION_DEAD; @@ -143,6 +163,13 @@ static bool is_client_expired(struct nfs4_client *clp) return clp->cl_time == 0; } +static void nfsd4_dec_courtesy_client_count(struct nfsd_net *nn, + struct nfs4_client *clp) +{ + if (clp->cl_state != NFSD4_ACTIVE) + atomic_add_unless(&nn->nfsd_courtesy_clients, -1, 0); +} + static __be32 get_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -152,6 +179,8 @@ static __be32 get_client_locked(struct nfs4_client *clp) if (is_client_expired(clp)) return nfserr_expired; atomic_inc(&clp->cl_rpc_users); + nfsd4_dec_courtesy_client_count(nn, clp); + clp->cl_state = NFSD4_ACTIVE; return nfs_ok; } @@ -172,6 +201,8 @@ renew_client_locked(struct nfs4_client *clp) list_move_tail(&clp->cl_lru, &nn->client_lru); clp->cl_time = ktime_get_boottime_seconds(); + nfsd4_dec_courtesy_client_count(nn, clp); + clp->cl_state = NFSD4_ACTIVE; } static void put_client_renew_locked(struct nfs4_client *clp) @@ -246,6 +277,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { if (fh_match(fh, &cur->nbl_fh)) { list_del_init(&cur->nbl_list); + WARN_ON(list_empty(&cur->nbl_lru)); list_del_init(&cur->nbl_lru); found = cur; break; @@ -271,6 +303,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, INIT_LIST_HEAD(&nbl->nbl_lru); fh_copy_shallow(&nbl->nbl_fh, fh); locks_init_lock(&nbl->nbl_lock); + kref_init(&nbl->nbl_kref); nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, &nfsd4_cb_notify_lock_ops, NFSPROC4_CLNT_CB_NOTIFY_LOCK); @@ -280,13 +313,22 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, } static void -free_blocked_lock(struct nfsd4_blocked_lock *nbl) +free_nbl(struct kref *kref) { - locks_delete_block(&nbl->nbl_lock); + struct nfsd4_blocked_lock *nbl; + + nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref); locks_release_private(&nbl->nbl_lock); kfree(nbl); } +static void +free_blocked_lock(struct nfsd4_blocked_lock *nbl) +{ + locks_delete_block(&nbl->nbl_lock); + kref_put(&nbl->nbl_kref, free_nbl); +} + static void remove_blocked_locks(struct nfs4_lockowner *lo) { @@ -302,6 +344,7 @@ remove_blocked_locks(struct nfs4_lockowner *lo) struct nfsd4_blocked_lock, nbl_list); list_del_init(&nbl->nbl_list); + WARN_ON(list_empty(&nbl->nbl_lru)); list_move(&nbl->nbl_lru, &reaplist); } spin_unlock(&nn->blocked_locks_lock); @@ -326,6 +369,8 @@ nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb) static int nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) { + trace_nfsd_cb_notify_lock_done(&zero_stateid, task); + /* * Since this is just an optimization, we don't try very hard if it * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and @@ -360,11 +405,13 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { * st_{access,deny}_bmap field of the stateid, in order to track not * only what share bits are currently in force, but also what * combinations of share bits previous opens have used. This allows us - * to enforce the recommendation of rfc 3530 14.2.19 that the server - * return an error if the client attempt to downgrade to a combination - * of share bits not explicable by closing some of its previous opens. + * to enforce the recommendation in + * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that + * the server return an error if the client attempt to downgrade to a + * combination of share bits not explicable by closing some of its + * previous opens. * - * XXX: This enforcement is actually incomplete, since we don't keep + * This enforcement is arguably incomplete, since we don't keep * track of access/deny bit combinations; so, e.g., we allow: * * OPEN allow read, deny write @@ -372,6 +419,10 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { * DOWNGRADE allow read, deny none * * which we should reject. + * + * But you could also argue that our current code is already overkill, + * since it only exists to return NFS4ERR_INVAL on incorrect client + * behavior. */ static unsigned int bmap_to_share_mode(unsigned long bmap) @@ -540,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) void put_nfs4_file(struct nfs4_file *fi) { - might_lock(&state_lock); - - if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { - hlist_del_rcu(&fi->fi_hash); - spin_unlock(&state_lock); + if (refcount_dec_and_test(&fi->fi_ref)) { + nfsd4_file_hash_remove(fi); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); @@ -554,9 +602,7 @@ put_nfs4_file(struct nfs4_file *fi) static struct nfsd_file * __nfs4_get_fd(struct nfs4_file *f, int oflag) { - if (f->fi_fds[oflag]) - return nfsd_file_get(f->fi_fds[oflag]); - return NULL; + return nfsd_file_get(f->fi_fds[oflag]); } static struct nfsd_file * @@ -669,20 +715,72 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) return ret & OWNER_HASH_MASK; } -/* hash table for nfs4_file */ -#define FILE_HASH_BITS 8 -#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) +static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp; + +static const struct rhashtable_params nfs4_file_rhash_params = { + .key_len = sizeof_field(struct nfs4_file, fi_inode), + .key_offset = offsetof(struct nfs4_file, fi_inode), + .head_offset = offsetof(struct nfs4_file, fi_rlist), + + /* + * Start with a single page hash table to reduce resizing churn + * on light workloads. + */ + .min_size = 256, + .automatic_shrinking = true, +}; -static unsigned int file_hashval(struct svc_fh *fh) +/* + * Check if courtesy clients have conflicting access and resolve it if possible + * + * access: is op_share_access if share_access is true. + * Check if access mode, op_share_access, would conflict with + * the current deny mode of the file 'fp'. + * access: is op_share_deny if share_access is false. + * Check if the deny mode, op_share_deny, would conflict with + * current access of the file 'fp'. + * stp: skip checking this entry. + * new_stp: normal open, not open upgrade. + * + * Function returns: + * false - access/deny mode conflict with normal client. + * true - no conflict or conflict with courtesy client(s) is resolved. + */ +static bool +nfs4_resolve_deny_conflicts_locked(struct nfs4_file *fp, bool new_stp, + struct nfs4_ol_stateid *stp, u32 access, bool share_access) { - struct inode *inode = d_inode(fh->fh_dentry); + struct nfs4_ol_stateid *st; + bool resolvable = true; + unsigned char bmap; + struct nfsd_net *nn; + struct nfs4_client *clp; - /* XXX: why not (here & in file cache) use inode? */ - return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); + lockdep_assert_held(&fp->fi_lock); + list_for_each_entry(st, &fp->fi_stateids, st_perfile) { + /* ignore lock stateid */ + if (st->st_openstp) + continue; + if (st == stp && new_stp) + continue; + /* check file access against deny mode or vice versa */ + bmap = share_access ? st->st_deny_bmap : st->st_access_bmap; + if (!(access & bmap_to_share_mode(bmap))) + continue; + clp = st->st_stid.sc_client; + if (try_to_expire_client(clp)) + continue; + resolvable = false; + break; + } + if (resolvable) { + clp = stp->st_stid.sc_client; + nn = net_generic(clp->net, nfsd_net_id); + mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); + } + return resolvable; } -static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; - static void __nfs4_file_get_access(struct nfs4_file *fp, u32 access) { @@ -886,23 +984,23 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla * Create a unique stateid_t to represent each COPY. */ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, - unsigned char sc_type) + unsigned char cs_type) { int new_id; - stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; - stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; - stid->sc_type = sc_type; + stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; + stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; idr_preload(GFP_KERNEL); spin_lock(&nn->s2s_cp_lock); new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); - stid->stid.si_opaque.so_id = new_id; - stid->stid.si_generation = 1; + stid->cs_stid.si_opaque.so_id = new_id; + stid->cs_stid.si_generation = 1; spin_unlock(&nn->s2s_cp_lock); idr_preload_end(); if (new_id < 0) return 0; + stid->cs_type = cs_type; return 1; } @@ -920,7 +1018,7 @@ struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, if (!cps) return NULL; cps->cpntf_time = ktime_get_boottime_seconds(); - refcount_set(&cps->cp_stateid.sc_count, 1); + refcount_set(&cps->cp_stateid.cs_count, 1); if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) goto out_free; spin_lock(&nn->s2s_cp_lock); @@ -936,11 +1034,12 @@ void nfs4_free_copy_state(struct nfsd4_copy *copy) { struct nfsd_net *nn; - WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID); + if (copy->cp_stateid.cs_type != NFS4_COPY_STID) + return; nn = net_generic(copy->cp_clp->net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); idr_remove(&nn->s2s_cp_stateids, - copy->cp_stateid.stid.si_opaque.so_id); + copy->cp_stateid.cs_stid.si_opaque.so_id); spin_unlock(&nn->s2s_cp_lock); } @@ -972,7 +1071,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) static void nfs4_free_deleg(struct nfs4_stid *stid) { - WARN_ON(!list_empty(&stid->sc_cp_list)); + struct nfs4_delegation *dp = delegstateid(stid); + + WARN_ON_ONCE(!list_empty(&stid->sc_cp_list)); + WARN_ON_ONCE(!list_empty(&dp->dl_perfile)); + WARN_ON_ONCE(!list_empty(&dp->dl_perclnt)); + WARN_ON_ONCE(!list_empty(&dp->dl_recall_lru)); kmem_cache_free(deleg_slab, stid); atomic_long_dec(&num_delegations); } @@ -1022,7 +1126,7 @@ static int delegation_blocked(struct knfsd_fh *fh) } spin_unlock(&blocked_delegations_lock); } - hash = jhash(&fh->fh_base, fh->fh_size, 0); + hash = jhash(&fh->fh_raw, fh->fh_size, 0); if (test_bit(hash&255, bd->set[0]) && test_bit((hash>>8)&255, bd->set[0]) && test_bit((hash>>16)&255, bd->set[0])) @@ -1041,7 +1145,7 @@ static void block_delegations(struct knfsd_fh *fh) u32 hash; struct bloom_pair *bd = &blocked_delegations; - hash = jhash(&fh->fh_base, fh->fh_size, 0); + hash = jhash(&fh->fh_raw, fh->fh_size, 0); spin_lock(&blocked_delegations_lock); __set_bit(hash&255, bd->set[bd->new]); @@ -1055,7 +1159,6 @@ static void block_delegations(struct knfsd_fh *fh) static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, - struct svc_fh *current_fh, struct nfs4_clnt_odstate *odstate) { struct nfs4_delegation *dp; @@ -1065,7 +1168,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, n = atomic_long_inc_return(&num_delegations); if (n < 0 || n > max_delegations) goto out_dec; - if (delegation_blocked(¤t_fh->fh_handle)) + if (delegation_blocked(&fp->fi_fhandle)) goto out_dec; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg)); if (dp == NULL) @@ -1084,6 +1187,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, get_clnt_odstate(odstate); dp->dl_type = NFS4_OPEN_DELEGATE_READ; dp->dl_retries = 1; + dp->dl_recalled = false; nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); get_nfs4_file(fp); @@ -1262,6 +1366,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); + trace_nfsd_stid_revoke(&dp->dl_stid); + if (clp->cl_minorversion) { spin_lock(&clp->cl_lock); dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; @@ -1726,13 +1832,12 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, int numslots = fattrs->maxreqs; int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; - int mem, i; + int i; - BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) - + sizeof(struct nfsd4_session) > PAGE_SIZE); - mem = numslots * sizeof(struct nfsd4_slot *); + BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION) + > PAGE_SIZE); - new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); + new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL); if (!new) return NULL; /* allocate each struct nfsd4_slot and data cache in one piece */ @@ -1977,11 +2082,16 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) * This type of memory management is somewhat inefficient, but we use it * anyway since SETCLIENTID is not a common operation. */ -static struct nfs4_client *alloc_client(struct xdr_netobj name) +static struct nfs4_client *alloc_client(struct xdr_netobj name, + struct nfsd_net *nn) { struct nfs4_client *clp; int i; + if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) { + mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); + return NULL; + } clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; @@ -1999,6 +2109,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) idr_init(&clp->cl_stateids); atomic_set(&clp->cl_rpc_users, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; + clp->cl_state = NFSD4_ACTIVE; + atomic_inc(&nn->nfs4_client_count); + atomic_set(&clp->cl_delegs_in_recall, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); @@ -2030,6 +2143,7 @@ static void __free_client(struct kref *k) kfree(clp->cl_nii_domain.data); kfree(clp->cl_nii_name.data); idr_destroy(&clp->cl_stateids); + kfree(clp->cl_ra); kmem_cache_free(client_slab, clp); } @@ -2105,6 +2219,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) static void __destroy_client(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; @@ -2148,6 +2263,8 @@ __destroy_client(struct nfs4_client *clp) nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); + atomic_add_unless(&nn->nfs4_client_count, -1, 0); + nfsd4_dec_courtesy_client_count(nn, clp); free_client(clp); wake_up_all(&expiry_wq); } @@ -2393,7 +2510,7 @@ static const char *cb_state2str(int state) static int client_info_show(struct seq_file *m, void *v) { - struct inode *inode = m->private; + struct inode *inode = file_inode(m->file); struct nfs4_client *clp; u64 clid; @@ -2403,10 +2520,17 @@ static int client_info_show(struct seq_file *m, void *v) memcpy(&clid, &clp->cl_clientid, sizeof(clid)); seq_printf(m, "clientid: 0x%llx\n", clid); seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr); - if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) + + if (clp->cl_state == NFSD4_COURTESY) + seq_puts(m, "status: courtesy\n"); + else if (clp->cl_state == NFSD4_EXPIRABLE) + seq_puts(m, "status: expirable\n"); + else if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) seq_puts(m, "status: confirmed\n"); else seq_puts(m, "status: unconfirmed\n"); + seq_printf(m, "seconds from last renew: %lld\n", + ktime_get_boottime_seconds() - clp->cl_time); seq_printf(m, "name: "); seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len); seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion); @@ -2426,17 +2550,7 @@ static int client_info_show(struct seq_file *m, void *v) return 0; } -static int client_info_open(struct inode *inode, struct file *file) -{ - return single_open(file, client_info_show, inode); -} - -static const struct file_operations client_info_fops = { - .open = client_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(client_info); static void *states_start(struct seq_file *s, loff_t *pos) __acquires(&clp->cl_lock) @@ -2479,7 +2593,7 @@ static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f) static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f) { - struct inode *inode = f->nf_inode; + struct inode *inode = file_inode(f->nf_file); seq_printf(s, "superblock: \"%02x:%02x:%ld\"", MAJOR(inode->i_sb->s_dev), @@ -2757,6 +2871,34 @@ static const struct tree_descr client_files[] = { [3] = {""}, }; +static int +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, + struct rpc_task *task) +{ + trace_nfsd_cb_recall_any_done(cb, task); + switch (task->tk_status) { + case -NFS4ERR_DELAY: + rpc_delay(task, 2 * HZ); + return 0; + default: + return 1; + } +} + +static void +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + drop_client(clp); +} + +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { + .done = nfsd4_cb_recall_any_done, + .release = nfsd4_cb_recall_any_release, +}; + static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -2767,7 +2909,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct dentry *dentries[ARRAY_SIZE(client_files)]; - clp = alloc_client(name); + clp = alloc_client(name, nn); if (clp == NULL) return NULL; @@ -2794,6 +2936,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name, free_client(clp); return NULL; } + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL); + if (!clp->cl_ra) { + free_client(clp); + return NULL; + } + clp->cl_ra_time = 0; + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops, + NFSPROC4_CLNT_CB_RECALL_ANY); return clp; } @@ -4163,11 +4313,9 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ -static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, - struct nfs4_file *fp) -{ - lockdep_assert_held(&state_lock); +static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp) +{ refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); @@ -4185,7 +4333,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, INIT_LIST_HEAD(&fp->fi_lo_states); atomic_set(&fp->fi_lo_recalls, 0); #endif - hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); } void @@ -4249,6 +4396,54 @@ nfsd4_init_slabs(void) return -ENOMEM; } +static unsigned long +nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) +{ + int count; + struct nfsd_net *nn = container_of(shrink, + struct nfsd_net, nfsd_client_shrinker); + + count = atomic_read(&nn->nfsd_courtesy_clients); + if (!count) + count = atomic_long_read(&num_delegations); + if (count) + queue_work(laundry_wq, &nn->nfsd_shrinker_work); + return (unsigned long)count; +} + +static unsigned long +nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + return SHRINK_STOP; +} + +void +nfsd4_init_leases_net(struct nfsd_net *nn) +{ + struct sysinfo si; + u64 max_clients; + + nn->nfsd4_lease = 90; /* default lease time */ + nn->nfsd4_grace = 90; + nn->somebody_reclaimed = false; + nn->track_reclaim_completes = false; + nn->clverifier_counter = prandom_u32(); + nn->clientid_base = prandom_u32(); + nn->clientid_counter = nn->clientid_base + 1; + nn->s2s_cp_cl_id = nn->clientid_counter++; + + atomic_set(&nn->nfs4_client_count, 0); + si_meminfo(&si); + max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024); + max_clients *= NFS4_CLIENTS_PER_GB; + nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); + + atomic_set(&nn->nfsd_courtesy_clients, 0); + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; +} + static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; @@ -4517,71 +4712,80 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) nfs4_put_stid(&last->st_stid); } -/* search file_hashtbl[] for file */ -static struct nfs4_file * -find_file_locked(struct svc_fh *fh, unsigned int hashval) +static noinline_for_stack struct nfs4_file * +nfsd4_file_hash_lookup(const struct svc_fh *fhp) { - struct nfs4_file *fp; + struct inode *inode = d_inode(fhp->fh_dentry); + struct rhlist_head *tmp, *list; + struct nfs4_file *fi; - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - return fp; + rcu_read_lock(); + list = rhltable_lookup(&nfs4_file_rhltable, &inode, + nfs4_file_rhash_params); + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { + if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { + if (refcount_inc_not_zero(&fi->fi_ref)) { + rcu_read_unlock(); + return fi; + } } } + rcu_read_unlock(); return NULL; } -static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, - unsigned int hashval) +/* + * On hash insertion, identify entries with the same inode but + * distinct filehandles. They will all be on the list returned + * by rhltable_lookup(). + * + * inode->i_lock prevents racing insertions from adding an entry + * for the same inode/fhp pair twice. + */ +static noinline_for_stack struct nfs4_file * +nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) { - struct nfs4_file *fp; + struct inode *inode = d_inode(fhp->fh_dentry); + struct rhlist_head *tmp, *list; struct nfs4_file *ret = NULL; bool alias_found = false; + struct nfs4_file *fi; + int err; - spin_lock(&state_lock); - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - ret = fp; - } else if (d_inode(fh->fh_dentry) == fp->fi_inode) - fp->fi_aliased = alias_found = true; - } - if (likely(ret == NULL)) { - nfsd4_init_file(fh, hashval, new); - new->fi_aliased = alias_found; - ret = new; + rcu_read_lock(); + spin_lock(&inode->i_lock); + + list = rhltable_lookup(&nfs4_file_rhltable, &inode, + nfs4_file_rhash_params); + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { + if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { + if (refcount_inc_not_zero(&fi->fi_ref)) + ret = fi; + } else + fi->fi_aliased = alias_found = true; } - spin_unlock(&state_lock); - return ret; -} + if (ret) + goto out_unlock; -static struct nfs4_file * find_file(struct svc_fh *fh) -{ - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); + nfsd4_file_init(fhp, new); + err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist, + nfs4_file_rhash_params); + if (err) + goto out_unlock; - rcu_read_lock(); - fp = find_file_locked(fh, hashval); + new->fi_aliased = alias_found; + ret = new; + +out_unlock: + spin_unlock(&inode->i_lock); rcu_read_unlock(); - return fp; + return ret; } -static struct nfs4_file * -find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) +static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) { - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); - - rcu_read_lock(); - fp = find_file_locked(fh, hashval); - rcu_read_unlock(); - if (fp) - return fp; - - return insert_file(new, fh, hashval); + rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist, + nfs4_file_rhash_params); } /* @@ -4594,9 +4798,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) struct nfs4_file *fp; __be32 ret = nfs_ok; - fp = find_file(current_fh); + fp = nfsd4_file_hash_lookup(current_fh); if (!fp) return ret; + /* Check for conflicting share reservations */ spin_lock(&fp->fi_lock); if (fp->fi_share_deny & deny_type) @@ -4606,6 +4811,35 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) return ret; } +static bool nfsd4_deleg_present(const struct inode *inode) +{ + struct file_lock_context *ctx = locks_inode_context(inode); + + return ctx && !list_empty_careful(&ctx->flc_lease); +} + +/** + * nfsd_wait_for_delegreturn - wait for delegations to be returned + * @rqstp: the RPC transaction being executed + * @inode: in-core inode of the file being waited for + * + * The timeout prevents deadlock if all nfsd threads happen to be + * tied up waiting for returning delegations. + * + * Return values: + * %true: delegation was returned + * %false: timed out waiting for delegreturn + */ +bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode) +{ + long __maybe_unused timeo; + + timeo = wait_var_event_timeout(inode, !nfsd4_deleg_present(inode), + NFSD_DELEGRETURN_TIMEOUT); + trace_nfsd_delegret_wakeup(rqstp, inode, timeo); + return timeo > 0; +} + static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) { struct nfs4_delegation *dp = cb_to_delegation(cb); @@ -4634,6 +4868,8 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, { struct nfs4_delegation *dp = cb_to_delegation(cb); + trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task); + if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID || dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) return 1; @@ -4679,23 +4915,31 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the - * i_lock) we know the server hasn't removed the lease yet, and + * flc_lock) we know the server hasn't removed the lease yet, and * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); - nfsd4_run_cb(&dp->dl_recall); + WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall)); } -/* Called from break_lease() with i_lock held. */ +/* Called from break_lease() with flc_lock held. */ static bool nfsd_break_deleg_cb(struct file_lock *fl) { - bool ret = false; struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; struct nfs4_file *fp = dp->dl_stid.sc_file; + struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfsd_net *nn; trace_nfsd_cb_recall(&dp->dl_stid); + dp->dl_recalled = true; + atomic_inc(&clp->cl_delegs_in_recall); + if (try_to_expire_client(clp)) { + nn = net_generic(clp->net, nfsd_net_id); + mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); + } + /* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if a delegation isn't returned @@ -4703,11 +4947,9 @@ nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); - spin_unlock(&fp->fi_lock); - return ret; + return false; } /** @@ -4738,9 +4980,14 @@ static int nfsd_change_deleg_cb(struct file_lock *onlist, int arg, struct list_head *dispose) { - if (arg & F_UNLCK) + struct nfs4_delegation *dp = (struct nfs4_delegation *)onlist->fl_owner; + struct nfs4_client *clp = dp->dl_stid.sc_client; + + if (arg & F_UNLCK) { + if (dp->dl_recalled) + atomic_dec(&clp->cl_delegs_in_recall); return lease_modify(onlist, arg, dispose); - else + } else return -EAGAIN; } @@ -4937,16 +5184,19 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, .ia_valid = ATTR_SIZE, .ia_size = 0, }; + struct nfsd_attrs attrs = { + .na_iattr = &iattr, + }; if (!open->op_truncate) return 0; if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; - return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0); + return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0); } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, - struct nfsd4_open *open) + struct nfsd4_open *open, bool new_stp) { struct nfsd_file *nf = NULL; __be32 status; @@ -4962,6 +5212,13 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, */ status = nfs4_file_check_deny(fp, open->op_share_deny); if (status != nfs_ok) { + if (status != nfserr_share_denied) { + spin_unlock(&fp->fi_lock); + goto out; + } + if (nfs4_resolve_deny_conflicts_locked(fp, new_stp, + stp, open->op_share_deny, false)) + status = nfserr_jukebox; spin_unlock(&fp->fi_lock); goto out; } @@ -4969,6 +5226,13 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, /* set access to the file */ status = nfs4_file_get_access(fp, open->op_share_access); if (status != nfs_ok) { + if (status != nfserr_share_denied) { + spin_unlock(&fp->fi_lock); + goto out; + } + if (nfs4_resolve_deny_conflicts_locked(fp, new_stp, + stp, open->op_share_access, true)) + status = nfserr_jukebox; spin_unlock(&fp->fi_lock); goto out; } @@ -4984,9 +5248,12 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); - status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); - if (status) + + status = nfsd_file_acquire_opened(rqstp, cur_fh, access, + open->op_filp, &nf); + if (status != nfs_ok) goto out_put_access; + spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { fp->fi_fds[oflag] = nf; @@ -5015,21 +5282,30 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, } static __be32 -nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) +nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, + struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, + struct nfsd4_open *open) { __be32 status; unsigned char old_deny_bmap = stp->st_deny_bmap; if (!test_access(open->op_share_access, stp)) - return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open); + return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open, false); /* test and set deny mode */ spin_lock(&fp->fi_lock); status = nfs4_file_check_deny(fp, open->op_share_deny); - if (status == nfs_ok) { + switch (status) { + case nfs_ok: set_deny(open->op_share_deny, stp); fp->fi_share_deny |= - (open->op_share_deny & NFS4_SHARE_DENY_BOTH); + (open->op_share_deny & NFS4_SHARE_DENY_BOTH); + break; + case nfserr_share_denied: + if (nfs4_resolve_deny_conflicts_locked(fp, false, + stp, open->op_share_deny, false)) + status = nfserr_jukebox; + break; } spin_unlock(&fp->fi_lock); @@ -5132,11 +5408,59 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, return 0; } +/* + * It's possible that between opening the dentry and setting the delegation, + * that it has been renamed or unlinked. Redo the lookup to verify that this + * hasn't happened. + */ +static int +nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp, + struct svc_fh *parent) +{ + struct svc_export *exp; + struct dentry *child; + __be32 err; + + err = nfsd_lookup_dentry(open->op_rqstp, parent, + open->op_fname, open->op_fnamelen, + &exp, &child); + + if (err) + return -EAGAIN; + + exp_put(exp); + dput(child); + if (child != file_dentry(fp->fi_deleg_file->nf_file)) + return -EAGAIN; + + return 0; +} + +/* + * We avoid breaking delegations held by a client due to its own activity, but + * clearing setuid/setgid bits on a write is an implicit activity and the client + * may not notice and continue using the old mode. Avoid giving out a delegation + * on setuid/setgid files when the client is requesting an open for write. + */ +static int +nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf) +{ + struct inode *inode = file_inode(nf->nf_file); + + if ((open->op_share_access & NFS4_SHARE_ACCESS_WRITE) && + (inode->i_mode & (S_ISUID|S_ISGID))) + return -EAGAIN; + return 0; +} + static struct nfs4_delegation * -nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, - struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) +nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, + struct svc_fh *parent) { int status = 0; + struct nfs4_client *clp = stp->st_stid.sc_client; + struct nfs4_file *fp = stp->st_stid.sc_file; + struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; struct nfs4_delegation *dp; struct nfsd_file *nf; struct file_lock *fl; @@ -5162,6 +5486,8 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, spin_lock(&fp->fi_lock); if (nfs4_delegation_exists(clp, fp)) status = -EAGAIN; + else if (nfsd4_verify_setuid_write(open, nf)) + status = -EAGAIN; else if (!fp->fi_deleg_file) { fp->fi_deleg_file = nf; /* increment early to prevent fi_deleg_file from being @@ -5178,7 +5504,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, return ERR_PTR(status); status = -ENOMEM; - dp = alloc_init_deleg(clp, fp, fh, odstate); + dp = alloc_init_deleg(clp, fp, odstate); if (!dp) goto out_delegees; @@ -5191,16 +5517,32 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, locks_free_lock(fl); if (status) goto out_clnt_odstate; + + if (parent) { + status = nfsd4_verify_deleg_dentry(open, fp, parent); + if (status) + goto out_unlock; + } + status = nfsd4_check_conflicting_opens(clp, fp); if (status) goto out_unlock; + /* + * Now that the deleg is set, check again to ensure that nothing + * raced in and changed the mode while we weren't lookng. + */ + status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file); + if (status) + goto out_unlock; + + status = -EAGAIN; + if (fp->fi_had_conflict) + goto out_unlock; + spin_lock(&state_lock); spin_lock(&fp->fi_lock); - if (fp->fi_had_conflict) - status = -EAGAIN; - else - status = hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -5246,12 +5588,13 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) * proper support for them. */ static void -nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, - struct nfs4_ol_stateid *stp) +nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, + struct svc_fh *currentfh) { struct nfs4_delegation *dp; struct nfs4_openowner *oo = openowner(stp->st_stateowner); struct nfs4_client *clp = stp->st_stid.sc_client; + struct svc_fh *parent = NULL; int cb_up; int status = 0; @@ -5265,6 +5608,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, goto out_no_deleg; break; case NFS4_OPEN_CLAIM_NULL: + parent = currentfh; + fallthrough; case NFS4_OPEN_CLAIM_FH: /* * Let's not give out any delegations till everyone's @@ -5279,7 +5624,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, default: goto out_no_deleg; } - dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate); + dp = nfs4_set_delegation(open, stp, parent); if (IS_ERR(dp)) goto out_no_deleg; @@ -5321,6 +5666,18 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, */ } +/** + * nfsd4_process_open2 - finish open processing + * @rqstp: the RPC transaction being executed + * @current_fh: NFSv4 COMPOUND's current filehandle + * @open: OPEN arguments + * + * If successful, (1) truncate the file if open->op_truncate was + * set, (2) set open->op_stateid, (3) set open->op_delegation. + * + * Returns %nfs_ok on success; otherwise an nfs4stat value in + * network byte order is returned. + */ __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { @@ -5337,7 +5694,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_or_add_file(open->op_file, current_fh); + fp = nfsd4_file_hash_insert(open->op_file, current_fh); + if (unlikely(!fp)) + return nfserr_jukebox; if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) @@ -5370,7 +5729,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } } else { - status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); + status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true); if (status) { stp->st_stid.sc_type = NFS4_CLOSED_STID; release_open_stateid(stp); @@ -5399,7 +5758,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(current_fh, open, stp); + nfs4_open_delegation(open, stp, &resp->cstate.current_fh); nodeleg: status = nfs_ok; trace_nfsd_open(&stp->st_stid.sc_stateid); @@ -5573,7 +5932,7 @@ static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn) static void nfsd4_ssc_expire_umount(struct nfsd_net *nn) { bool do_wakeup = false; - struct nfsd4_ssc_umount_item *ni = 0; + struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *tmp; spin_lock(&nn->nfsd_ssc_lock); @@ -5604,10 +5963,131 @@ static void nfsd4_ssc_expire_umount(struct nfsd_net *nn) } #endif +/* Check if any lock belonging to this lockowner has any blockers */ +static bool +nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo) +{ + struct file_lock_context *ctx; + struct nfs4_ol_stateid *stp; + struct nfs4_file *nf; + + list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { + nf = stp->st_stid.sc_file; + ctx = locks_inode_context(nf->fi_inode); + if (!ctx) + continue; + if (locks_owner_has_blockers(ctx, lo)) + return true; + } + return false; +} + +static bool +nfs4_anylock_blockers(struct nfs4_client *clp) +{ + int i; + struct nfs4_stateowner *so; + struct nfs4_lockowner *lo; + + if (atomic_read(&clp->cl_delegs_in_recall)) + return true; + spin_lock(&clp->cl_lock); + for (i = 0; i < OWNER_HASH_SIZE; i++) { + list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i], + so_strhash) { + if (so->so_is_open_owner) + continue; + lo = lockowner(so); + if (nfs4_lockowner_has_blockers(lo)) { + spin_unlock(&clp->cl_lock); + return true; + } + } + } + spin_unlock(&clp->cl_lock); + return false; +} + +static void +nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, + struct laundry_time *lt) +{ + unsigned int maxreap, reapcnt = 0; + struct list_head *pos, *next; + struct nfs4_client *clp; + + maxreap = (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) ? + NFSD_CLIENT_MAX_TRIM_PER_RUN : 0; + INIT_LIST_HEAD(reaplist); + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_state == NFSD4_EXPIRABLE) + goto exp_client; + if (!state_expired(lt, clp->cl_time)) + break; + if (!atomic_read(&clp->cl_rpc_users)) { + if (clp->cl_state == NFSD4_ACTIVE) + atomic_inc(&nn->nfsd_courtesy_clients); + clp->cl_state = NFSD4_COURTESY; + } + if (!client_has_state(clp)) + goto exp_client; + if (!nfs4_anylock_blockers(clp)) + if (reapcnt >= maxreap) + continue; +exp_client: + if (!mark_client_expired_locked(clp)) { + list_add(&clp->cl_lru, reaplist); + reapcnt++; + } + } + spin_unlock(&nn->client_lock); +} + +static void +nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn, + struct list_head *reaplist) +{ + unsigned int maxreap = 0, reapcnt = 0; + struct list_head *pos, *next; + struct nfs4_client *clp; + + maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN; + INIT_LIST_HEAD(reaplist); + + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_state == NFSD4_ACTIVE) + break; + if (reapcnt >= maxreap) + break; + if (!mark_client_expired_locked(clp)) { + list_add(&clp->cl_lru, reaplist); + reapcnt++; + } + } + spin_unlock(&nn->client_lock); +} + +static void +nfs4_process_client_reaplist(struct list_head *reaplist) +{ + struct list_head *pos, *next; + struct nfs4_client *clp; + + list_for_each_safe(pos, next, reaplist) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + trace_nfsd_clid_purged(&clp->cl_clientid); + list_del_init(&clp->cl_lru); + expire_client(clp); + } +} + static time64_t nfs4_laundromat(struct nfsd_net *nn) { - struct nfs4_client *clp; struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct nfs4_ol_stateid *stp; @@ -5626,33 +6106,18 @@ nfs4_laundromat(struct nfsd_net *nn) goto out; } nfsd4_end_grace(nn); - INIT_LIST_HEAD(&reaplist); spin_lock(&nn->s2s_cp_lock); idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); - if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && + if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID && state_expired(<, cps->cpntf_time)) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); + nfs4_get_client_reaplist(nn, &reaplist, <); + nfs4_process_client_reaplist(&reaplist); - spin_lock(&nn->client_lock); - list_for_each_safe(pos, next, &nn->client_lru) { - clp = list_entry(pos, struct nfs4_client, cl_lru); - if (!state_expired(<, clp->cl_time)) - break; - if (mark_client_expired_locked(clp)) - continue; - list_add(&clp->cl_lru, &reaplist); - } - spin_unlock(&nn->client_lock); - list_for_each_safe(pos, next, &reaplist) { - clp = list_entry(pos, struct nfs4_client, cl_lru); - trace_nfsd_clid_purged(&clp->cl_clientid); - list_del_init(&clp->cl_lru); - expire_client(clp); - } spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); @@ -5721,7 +6186,6 @@ nfs4_laundromat(struct nfsd_net *nn) return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); } -static struct workqueue_struct *laundry_wq; static void laundromat_main(struct work_struct *); static void @@ -5736,6 +6200,64 @@ laundromat_main(struct work_struct *laundry) queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } +static void +courtesy_client_reaper(struct nfsd_net *nn) +{ + struct list_head reaplist; + + nfs4_get_courtesy_client_reaplist(nn, &reaplist); + nfs4_process_client_reaplist(&reaplist); +} + +static void +deleg_reaper(struct nfsd_net *nn) +{ + struct list_head *pos, *next; + struct nfs4_client *clp; + struct list_head cblist; + + INIT_LIST_HEAD(&cblist); + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_state != NFSD4_ACTIVE || + list_empty(&clp->cl_delegations) || + atomic_read(&clp->cl_delegs_in_recall) || + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || + (ktime_get_boottime_seconds() - + clp->cl_ra_time < 5)) { + continue; + } + list_add(&clp->cl_ra_cblist, &cblist); + + /* release in nfsd4_cb_recall_any_release */ + kref_get(&clp->cl_nfsdfs.cl_ref); + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + clp->cl_ra_time = ktime_get_boottime_seconds(); + } + spin_unlock(&nn->client_lock); + + while (!list_empty(&cblist)) { + clp = list_first_entry(&cblist, struct nfs4_client, + cl_ra_cblist); + list_del_init(&clp->cl_ra_cblist); + clp->cl_ra->ra_keep = 0; + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); + trace_nfsd_cb_recall_any(clp->cl_ra); + nfsd4_run_cb(&clp->cl_ra->ra_cb); + } +} + +static void +nfsd4_state_shrinker_worker(struct work_struct *work) +{ + struct nfsd_net *nn = container_of(work, struct nfsd_net, + nfsd_shrinker_work); + + courtesy_client_reaper(nn); + deleg_reaper(nn); +} + static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) @@ -5869,6 +6391,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, struct nfs4_stid **s, struct nfsd_net *nn) { __be32 status; + struct nfs4_stid *stid; bool return_revoked = false; /* @@ -5891,15 +6414,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, } if (status) return status; - *s = find_stateid_by_type(cstate->clp, stateid, typemask); - if (!*s) + stid = find_stateid_by_type(cstate->clp, stateid, typemask); + if (!stid) return nfserr_bad_stateid; - if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { - nfs4_put_stid(*s); + if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { + nfs4_put_stid(stid); if (cstate->minorversion) return nfserr_deleg_revoked; return nfserr_bad_stateid; } + *s = stid; return nfs_ok; } @@ -5964,12 +6488,12 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) { - WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID); - if (!refcount_dec_and_test(&cps->cp_stateid.sc_count)) + WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID); + if (!refcount_dec_and_test(&cps->cp_stateid.cs_count)) return; list_del(&cps->cp_list); idr_remove(&nn->s2s_cp_stateids, - cps->cp_stateid.stid.si_opaque.so_id); + cps->cp_stateid.cs_stid.si_opaque.so_id); kfree(cps); } /* @@ -5991,12 +6515,12 @@ __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, if (cps_t) { state = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); - if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) { + if (state->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID) { state = NULL; goto unlock; } if (!clp) - refcount_inc(&state->cp_stateid.sc_count); + refcount_inc(&state->cp_stateid.cs_count); else _free_cpntf_state_locked(nn, state); } @@ -6498,6 +7022,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto put_stateid; + trace_nfsd_deleg_return(stateid); + wake_up_var(d_inode(cstate->current_fh.fh_dentry)); destroy_delegation(dp); put_stateid: nfs4_put_stid(&dp->dl_stid); @@ -6534,7 +7060,7 @@ nfs4_transform_lock_offset(struct file_lock *lock) } static fl_owner_t -nfsd4_fl_get_owner(fl_owner_t owner) +nfsd4_lm_get_owner(fl_owner_t owner) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; @@ -6543,7 +7069,7 @@ nfsd4_fl_get_owner(fl_owner_t owner) } static void -nfsd4_fl_put_owner(fl_owner_t owner) +nfsd4_lm_put_owner(fl_owner_t owner) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; @@ -6551,6 +7077,29 @@ nfsd4_fl_put_owner(fl_owner_t owner) nfs4_put_stateowner(&lo->lo_owner); } +/* return pointer to struct nfs4_client if client is expirable */ +static bool +nfsd4_lm_lock_expirable(struct file_lock *cfl) +{ + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)cfl->fl_owner; + struct nfs4_client *clp = lo->lo_owner.so_client; + struct nfsd_net *nn; + + if (try_to_expire_client(clp)) { + nn = net_generic(clp->net, nfsd_net_id); + mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); + return true; + } + return false; +} + +/* schedule laundromat to run immediately and wait for it to complete */ +static void +nfsd4_lm_expire_lock(void) +{ + flush_workqueue(laundry_wq); +} + static void nfsd4_lm_notify(struct file_lock *fl) { @@ -6577,9 +7126,12 @@ nfsd4_lm_notify(struct file_lock *fl) } static const struct lock_manager_operations nfsd_posix_mng_ops = { + .lm_mod_owner = THIS_MODULE, .lm_notify = nfsd4_lm_notify, - .lm_get_owner = nfsd4_fl_get_owner, - .lm_put_owner = nfsd4_fl_put_owner, + .lm_get_owner = nfsd4_lm_get_owner, + .lm_put_owner = nfsd4_lm_put_owner, + .lm_lock_expirable = nfsd4_lm_lock_expirable, + .lm_expire_lock = nfsd4_lm_expire_lock, }; static inline void @@ -6868,7 +7420,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_blocked_lock *nbl = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; - struct super_block *sb; __be32 status = 0; int lkflg; int err; @@ -6890,7 +7441,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfsd4_lock: permission denied!\n"); return status; } - sb = cstate->current_fh.fh_dentry->d_sb; if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) @@ -6942,8 +7492,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { case NFS4_READW_LT: - if (nfsd4_has_session(cstate) && - !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS)) + if (nfsd4_has_session(cstate)) fl_flags |= FL_SLEEP; fallthrough; case NFS4_READ_LT: @@ -6955,8 +7504,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fl_type = F_RDLCK; break; case NFS4_WRITEW_LT: - if (nfsd4_has_session(cstate) && - !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS)) + if (nfsd4_has_session(cstate)) fl_flags |= FL_SLEEP; fallthrough; case NFS4_WRITE_LT: @@ -6977,6 +7525,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } + /* + * Most filesystems with their own ->lock operations will block + * the nfsd thread waiting to acquire the lock. That leads to + * deadlocks (we don't want every nfsd thread tied up waiting + * for file locks), so don't attempt blocking lock notifications + * on those filesystems: + */ + if (nf->nf_file->f_op->lock) + fl_flags &= ~FL_SLEEP; + nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); if (!nbl) { dprintk("NFSD: %s: unable to allocate block!\n", __func__); @@ -7007,6 +7565,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); + kref_get(&nbl->nbl_kref); spin_unlock(&nn->blocked_locks_lock); } @@ -7019,6 +7578,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nn->somebody_reclaimed = true; break; case FILE_LOCK_DEFERRED: + kref_put(&nbl->nbl_kref, free_nbl); nbl = NULL; fallthrough; case -EAGAIN: /* conflock holds conflicting lock */ @@ -7039,8 +7599,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* dequeue it if we queued it before */ if (fl_flags & FL_SLEEP) { spin_lock(&nn->blocked_locks_lock); - list_del_init(&nbl->nbl_list); - list_del_init(&nbl->nbl_lru); + if (!list_empty(&nbl->nbl_list) && + !list_empty(&nbl->nbl_lru)) { + list_del_init(&nbl->nbl_list); + list_del_init(&nbl->nbl_lru); + kref_put(&nbl->nbl_kref, free_nbl); + } + /* nbl can use one of lists to be linked to reaplist */ spin_unlock(&nn->blocked_locks_lock); } free_blocked_lock(nbl); @@ -7081,21 +7646,22 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) { struct nfsd_file *nf; + struct inode *inode; __be32 err; err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; - fh_lock(fhp); /* to block new leases till after test_lock: */ - err = nfserrno(nfsd_open_break_lease(fhp->fh_dentry->d_inode, - NFSD_MAY_READ)); + inode = fhp->fh_dentry->d_inode; + inode_lock(inode); /* to block new leases till after test_lock: */ + err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (err) goto out; lock->fl_file = nf->nf_file; err = nfserrno(vfs_test_lock(nf->nf_file, lock)); lock->fl_file = NULL; out: - fh_unlock(fhp); + inode_unlock(inode); nfsd_file_put(nf); return err; } @@ -7257,18 +7823,20 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock *fl; int status = false; - struct nfsd_file *nf = find_any_file(fp); + struct nfsd_file *nf; struct inode *inode; struct file_lock_context *flctx; + spin_lock(&fp->fi_lock); + nf = find_any_file_locked(fp); if (!nf) { /* Any valid lock stateid should have some sort of access */ WARN_ON_ONCE(1); - return status; + goto out; } inode = locks_inode(nf->nf_file); - flctx = inode->i_flctx; + flctx = locks_inode_context(inode); if (flctx && !list_empty_careful(&flctx->flc_posix)) { spin_lock(&flctx->flc_lock); @@ -7280,26 +7848,39 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } spin_unlock(&flctx->flc_lock); } - nfsd_file_put(nf); +out: + spin_unlock(&fp->fi_lock); return status; } +/** + * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations + * @rqstp: RPC transaction + * @cstate: NFSv4 COMPOUND state + * @u: RELEASE_LOCKOWNER arguments + * + * Check if theree are any locks still held and if not - free the lockowner + * and any lock state that is owned. + * + * Return values: + * %nfs_ok: lockowner released or not found + * %nfserr_locks_held: lockowner still in use + * %nfserr_stale_clientid: clientid no longer active + * %nfserr_expired: clientid not recognized + */ __be32 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); clientid_t *clid = &rlockowner->rl_clientid; - struct nfs4_stateowner *sop; - struct nfs4_lockowner *lo = NULL; struct nfs4_ol_stateid *stp; - struct xdr_netobj *owner = &rlockowner->rl_owner; - unsigned int hashval = ownerstr_hashval(owner); - __be32 status; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_lockowner *lo; struct nfs4_client *clp; - LIST_HEAD (reaplist); + LIST_HEAD(reaplist); + __be32 status; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); @@ -7307,30 +7888,22 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, status = set_client(clid, cstate, nn); if (status) return status; - clp = cstate->clp; - /* Find the matching lock stateowner */ - spin_lock(&clp->cl_lock); - list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval], - so_strhash) { - if (sop->so_is_open_owner || !same_owner_str(sop, owner)) - continue; + spin_lock(&clp->cl_lock); + lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner); + if (!lo) { + spin_unlock(&clp->cl_lock); + return nfs_ok; + } - if (atomic_read(&sop->so_count) != 1) { + list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { + if (check_for_locks(stp->st_stid.sc_file, lo)) { spin_unlock(&clp->cl_lock); + nfs4_put_stateowner(&lo->lo_owner); return nfserr_locks_held; } - - lo = lockowner(sop); - nfs4_get_stateowner(sop); - break; - } - if (!lo) { - spin_unlock(&clp->cl_lock); - return status; } - unhash_lockowner_locked(lo); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, @@ -7340,11 +7913,11 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); remove_blocked_locks(lo); nfs4_put_stateowner(&lo->lo_owner); - - return status; + return nfs_ok; } static inline struct nfs4_client_reclaim * @@ -7510,10 +8083,20 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); + INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + + if (register_shrinker(&nn->nfsd_client_shrinker)) + goto err_shrinker; return 0; +err_shrinker: + put_net(net); + kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: @@ -7585,22 +8168,18 @@ nfs4_state_start(void) { int ret; - laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); - if (laundry_wq == NULL) { - ret = -ENOMEM; - goto out; - } - ret = nfsd4_create_callback_queue(); + ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params); if (ret) - goto out_free_laundry; + return ret; + + ret = nfsd4_create_callback_queue(); + if (ret) { + rhltable_destroy(&nfs4_file_rhltable); + return ret; + } set_max_delegations(); return 0; - -out_free_laundry: - destroy_workqueue(laundry_wq); -out: - return ret; } void @@ -7610,6 +8189,8 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unregister_shrinker(&nn->nfsd_client_shrinker); + cancel_work(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -7637,8 +8218,8 @@ nfs4_state_shutdown_net(struct net *net) void nfs4_state_shutdown(void) { - destroy_workqueue(laundry_wq); nfsd4_destroy_callback_queue(); + rhltable_destroy(&nfs4_file_rhltable); } static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d28b75909d..5b95499a1f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -42,6 +42,8 @@ #include #include #include +#include + #include #include "idmap.h" @@ -277,21 +279,10 @@ nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf) static __be32 nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen) { - u32 i, count; - __be32 *p; - - if (xdr_stream_decode_u32(argp->xdr, &count) < 0) - return nfserr_bad_xdr; - /* request sanity */ - if (count > 1000) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, count << 2); - if (!p) - return nfserr_bad_xdr; - for (i = 0; i < bmlen; i++) - bmval[i] = (i < count) ? be32_to_cpup(p++) : 0; + ssize_t status; - return nfs_ok; + status = xdr_stream_decode_uint32_array(argp->xdr, bmval, bmlen); + return status == -EBADMSG ? nfserr_bad_xdr : nfs_ok; } static __be32 @@ -481,6 +472,15 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, return nfserr_bad_xdr; } } + if (bmval[1] & FATTR4_WORD1_TIME_CREATE) { + struct timespec64 ts; + + /* No Linux filesystem supports setting this attribute. */ + bmval[1] &= ~FATTR4_WORD1_TIME_CREATE; + status = nfsd4_decode_nfstime4(argp, &ts); + if (status) + return status; + } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { u32 set_it; @@ -770,16 +770,18 @@ nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) static __be32 nfsd4_decode_access(struct nfsd4_compoundargs *argp, - struct nfsd4_access *access) + union nfsd4_op_u *u) { + struct nfsd4_access *access = &u->access; if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0) return nfserr_bad_xdr; return nfs_ok; } static __be32 -nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) +nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_close *close = &u->close; if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0) return nfserr_bad_xdr; return nfsd4_decode_stateid4(argp, &close->cl_stateid); @@ -787,20 +789,24 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) static __be32 -nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) +nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_commit *commit = &u->commit; if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0) return nfserr_bad_xdr; if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0) return nfserr_bad_xdr; + memset(&commit->co_verf, 0, sizeof(commit->co_verf)); return nfs_ok; } static __be32 -nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) +nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_create *create = &u->create; __be32 *p, status; + memset(create, 0, sizeof(*create)); if (xdr_stream_decode_u32(argp->xdr, &create->cr_type) < 0) return nfserr_bad_xdr; switch (create->cr_type) { @@ -842,21 +848,26 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create } static inline __be32 -nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) +nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_delegreturn *dr = &u->delegreturn; return nfsd4_decode_stateid4(argp, &dr->dr_stateid); } static inline __be32 -nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) +nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_getattr *getattr = &u->getattr; + memset(getattr, 0, sizeof(*getattr)); return nfsd4_decode_bitmap4(argp, getattr->ga_bmval, ARRAY_SIZE(getattr->ga_bmval)); } static __be32 -nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) +nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_link *link = &u->link; + memset(link, 0, sizeof(*link)); return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen); } @@ -903,8 +914,10 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) } static __be32 -nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) +nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lock *lock = &u->lock; + memset(lock, 0, sizeof(*lock)); if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0) return nfserr_bad_xdr; if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) @@ -919,8 +932,10 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) } static __be32 -nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) +nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lockt *lockt = &u->lockt; + memset(lockt, 0, sizeof(*lockt)); if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0) return nfserr_bad_xdr; if ((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) @@ -934,8 +949,9 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) } static __be32 -nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) +nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_locku *locku = &u->locku; __be32 status; if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0) @@ -956,8 +972,9 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) } static __be32 -nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup) +nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lookup *lookup = &u->lookup; return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len); } @@ -1137,16 +1154,14 @@ nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_open *open = &u->open; __be32 status; u32 dummy; - memset(open->op_bmval, 0, sizeof(open->op_bmval)); - open->op_iattr.ia_valid = 0; - open->op_openowner = NULL; + memset(open, 0, sizeof(*open)); - open->op_xdr_error = 0; if (xdr_stream_decode_u32(argp->xdr, &open->op_seqid) < 0) return nfserr_bad_xdr; /* deleg_want is ignored */ @@ -1168,8 +1183,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) } static __be32 -nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) +nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_open_confirm *open_conf = &u->open_confirm; __be32 status; if (argp->minorversion >= 1) @@ -1181,14 +1198,19 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con if (xdr_stream_decode_u32(argp->xdr, &open_conf->oc_seqid) < 0) return nfserr_bad_xdr; + memset(&open_conf->oc_resp_stateid, 0, + sizeof(open_conf->oc_resp_stateid)); return nfs_ok; } static __be32 -nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down) +nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_open_downgrade *open_down = &u->open_downgrade; __be32 status; + memset(open_down, 0, sizeof(*open_down)); status = nfsd4_decode_stateid4(argp, &open_down->od_stateid); if (status) return status; @@ -1203,8 +1225,9 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d } static __be32 -nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) +nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_putfh *putfh = &u->putfh; __be32 *p; if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0) @@ -1218,11 +1241,12 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) if (!putfh->pf_fhval) return nfserr_jukebox; + putfh->no_verify = false; return nfs_ok; } static __be32 -nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { if (argp->minorversion == 0) return nfs_ok; @@ -1230,10 +1254,12 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) } static __be32 -nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) +nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; __be32 status; + memset(read, 0, sizeof(*read)); status = nfsd4_decode_stateid4(argp, &read->rd_stateid); if (status) return status; @@ -1246,10 +1272,12 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) } static __be32 -nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir) +nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_readdir *readdir = &u->readdir; __be32 status; + memset(readdir, 0, sizeof(*readdir)); if (xdr_stream_decode_u64(argp->xdr, &readdir->rd_cookie) < 0) return nfserr_bad_xdr; status = nfsd4_decode_verifier4(argp, &readdir->rd_verf); @@ -1267,16 +1295,20 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read } static __be32 -nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) +nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_remove *remove = &u->remove; + memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo)); return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen); } static __be32 -nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename) +nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_rename *rename = &u->rename; __be32 status; + memset(rename, 0, sizeof(*rename)); status = nfsd4_decode_component4(argp, &rename->rn_sname, &rename->rn_snamelen); if (status) return status; @@ -1284,23 +1316,28 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename } static __be32 -nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) +nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + clientid_t *clientid = &u->renew; return nfsd4_decode_clientid4(argp, clientid); } static __be32 nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo *secinfo = &u->secinfo; + secinfo->si_exp = NULL; return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen); } static __be32 -nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) +nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_setattr *setattr = &u->setattr; __be32 status; + memset(setattr, 0, sizeof(*setattr)); status = nfsd4_decode_stateid4(argp, &setattr->sa_stateid); if (status) return status; @@ -1311,10 +1348,13 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta } static __be32 -nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) +nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_setclientid *setclientid = &u->setclientid; __be32 *p, status; + memset(setclientid, 0, sizeof(*setclientid)); + if (argp->minorversion >= 1) return nfserr_notsupp; @@ -1352,8 +1392,10 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient } static __be32 -nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c) +nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm; __be32 status; if (argp->minorversion >= 1) @@ -1367,10 +1409,13 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s /* Also used for NVERIFY */ static __be32 -nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) +nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_verify *verify = &u->verify; __be32 *p, status; + memset(verify, 0, sizeof(*verify)); + status = nfsd4_decode_bitmap4(argp, verify->ve_bmval, ARRAY_SIZE(verify->ve_bmval)); if (status) @@ -1392,8 +1437,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify } static __be32 -nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) +nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_write *write = &u->write; __be32 status; status = nfsd4_decode_stateid4(argp, &write->wr_stateid); @@ -1410,12 +1456,17 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen)) return nfserr_bad_xdr; + write->wr_bytes_written = 0; + write->wr_how_written = 0; + memset(&write->wr_verifier, 0, sizeof(write->wr_verifier)); return nfs_ok; } static __be32 -nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) +nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; __be32 status; if (argp->minorversion >= 1) @@ -1432,18 +1483,24 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel return nfs_ok; } -static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl; + memset(bc, 0, sizeof(*bc)); if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0) return nfserr_bad_xdr; return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); } -static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; u32 use_conn_in_rdma_mode; __be32 status; + memset(bcts, 0, sizeof(*bcts)); status = nfsd4_decode_sessionid4(argp, &bcts->sessionid); if (status) return status; @@ -1581,10 +1638,12 @@ nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *exid) + union nfsd4_op_u *u) { + struct nfsd4_exchange_id *exid = &u->exchange_id; __be32 status; + memset(exid, 0, sizeof(*exid)); status = nfsd4_decode_verifier4(argp, &exid->verifier); if (status) return status; @@ -1633,10 +1692,12 @@ nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, - struct nfsd4_create_session *sess) + union nfsd4_op_u *u) { + struct nfsd4_create_session *sess = &u->create_session; __be32 status; + memset(sess, 0, sizeof(*sess)); status = nfsd4_decode_clientid4(argp, &sess->clientid); if (status) return status; @@ -1652,34 +1713,34 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, return status; if (xdr_stream_decode_u32(argp->xdr, &sess->callback_prog) < 0) return nfserr_bad_xdr; - status = nfsd4_decode_cb_sec(argp, &sess->cb_sec); - if (status) - return status; - - return nfs_ok; + return nfsd4_decode_cb_sec(argp, &sess->cb_sec); } static __be32 nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, - struct nfsd4_destroy_session *destroy_session) + union nfsd4_op_u *u) { + struct nfsd4_destroy_session *destroy_session = &u->destroy_session; return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid); } static __be32 nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, - struct nfsd4_free_stateid *free_stateid) + union nfsd4_op_u *u) { + struct nfsd4_free_stateid *free_stateid = &u->free_stateid; return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, - struct nfsd4_getdeviceinfo *gdev) + union nfsd4_op_u *u) { + struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; __be32 status; + memset(gdev, 0, sizeof(*gdev)); status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid); if (status) return status; @@ -1696,10 +1757,12 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutcommit *lcp) + union nfsd4_op_u *u) { + struct nfsd4_layoutcommit *lcp = &u->layoutcommit; __be32 *p, status; + memset(lcp, 0, sizeof(*lcp)); if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.offset) < 0) return nfserr_bad_xdr; if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.length) < 0) @@ -1731,10 +1794,12 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutget *lgp) + union nfsd4_op_u *u) { + struct nfsd4_layoutget *lgp = &u->layoutget; __be32 status; + memset(lgp, 0, sizeof(*lgp)); if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_signal) < 0) return nfserr_bad_xdr; if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_layout_type) < 0) @@ -1758,8 +1823,10 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutreturn *lrp) + union nfsd4_op_u *u) { + struct nfsd4_layoutreturn *lrp = &u->layoutreturn; + memset(lrp, 0, sizeof(*lrp)); if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0) return nfserr_bad_xdr; if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_layout_type) < 0) @@ -1771,17 +1838,21 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, #endif /* CONFIG_NFSD_PNFS */ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo_no_name *sin) + union nfsd4_op_u *u) { + struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name; if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0) return nfserr_bad_xdr; + + sin->sin_exp = NULL; return nfs_ok; } static __be32 nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, - struct nfsd4_sequence *seq) + union nfsd4_op_u *u) { + struct nfsd4_sequence *seq = &u->sequence; __be32 *p, status; status = nfsd4_decode_sessionid4(argp, &seq->sessionid); @@ -1795,16 +1866,20 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, seq->maxslots = be32_to_cpup(p++); seq->cachethis = be32_to_cpup(p); + seq->status_flags = 0; return nfs_ok; } static __be32 -nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct nfsd4_test_stateid_id *stateid; __be32 status; u32 i; + memset(test_stateid, 0, sizeof(*test_stateid)); if (xdr_stream_decode_u32(argp->xdr, &test_stateid->ts_num_ids) < 0) return nfserr_bad_xdr; @@ -1812,7 +1887,7 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta for (i = 0; i < test_stateid->ts_num_ids; i++) { stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); if (!stateid) - return nfserrno(-ENOMEM); /* XXX: not jukebox? */ + return nfserr_jukebox; INIT_LIST_HEAD(&stateid->ts_id_list); list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); status = nfsd4_decode_stateid4(argp, &stateid->ts_id_stateid); @@ -1824,14 +1899,16 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta } static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, - struct nfsd4_destroy_clientid *dc) + union nfsd4_op_u *u) { + struct nfsd4_destroy_clientid *dc = &u->destroy_clientid; return nfsd4_decode_clientid4(argp, &dc->clientid); } static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, - struct nfsd4_reclaim_complete *rc) + union nfsd4_op_u *u) { + struct nfsd4_reclaim_complete *rc = &u->reclaim_complete; if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0) return nfserr_bad_xdr; return nfs_ok; @@ -1839,8 +1916,9 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, - struct nfsd4_fallocate *fallocate) + union nfsd4_op_u *u) { + struct nfsd4_fallocate *fallocate = &u->allocate; __be32 status; status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid); @@ -1896,12 +1974,14 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) +nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_copy *copy = &u->copy; + u32 consecutive, i, count, sync; struct nl4_server *ns_dummy; - u32 consecutive, i, count; __be32 status; + memset(copy, 0, sizeof(*copy)); status = nfsd4_decode_stateid4(argp, ©->cp_src_stateid); if (status) return status; @@ -1917,25 +1997,28 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) /* ca_consecutive: we always do consecutive copies */ if (xdr_stream_decode_u32(argp->xdr, &consecutive) < 0) return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, ©->cp_synchronous) < 0) + if (xdr_stream_decode_bool(argp->xdr, &sync) < 0) return nfserr_bad_xdr; + nfsd4_copy_set_sync(copy, sync); if (xdr_stream_decode_u32(argp->xdr, &count) < 0) return nfserr_bad_xdr; - copy->cp_intra = false; + copy->cp_src = svcxdr_tmpalloc(argp, sizeof(*copy->cp_src)); + if (copy->cp_src == NULL) + return nfserr_jukebox; if (count == 0) { /* intra-server copy */ - copy->cp_intra = true; + __set_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); return nfs_ok; } /* decode all the supplied server addresses but use only the first */ - status = nfsd4_decode_nl4_server(argp, ©->cp_src); + status = nfsd4_decode_nl4_server(argp, copy->cp_src); if (status) return status; ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); if (ns_dummy == NULL) - return nfserrno(-ENOMEM); /* XXX: jukebox? */ + return nfserr_jukebox; for (i = 0; i < count - 1; i++) { status = nfsd4_decode_nl4_server(argp, ns_dummy); if (status) { @@ -1950,26 +2033,39 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) static __be32 nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, - struct nfsd4_copy_notify *cn) + union nfsd4_op_u *u) { + struct nfsd4_copy_notify *cn = &u->copy_notify; __be32 status; + memset(cn, 0, sizeof(*cn)); + cn->cpn_src = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_src)); + if (cn->cpn_src == NULL) + return nfserr_jukebox; + cn->cpn_dst = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_dst)); + if (cn->cpn_dst == NULL) + return nfserr_jukebox; + status = nfsd4_decode_stateid4(argp, &cn->cpn_src_stateid); if (status) return status; - return nfsd4_decode_nl4_server(argp, &cn->cpn_dst); + return nfsd4_decode_nl4_server(argp, cn->cpn_dst); } static __be32 nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, - struct nfsd4_offload_status *os) + union nfsd4_op_u *u) { + struct nfsd4_offload_status *os = &u->offload_status; + os->count = 0; + os->status = 0; return nfsd4_decode_stateid4(argp, &os->stateid); } static __be32 -nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) +nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_seek *seek = &u->seek; __be32 status; status = nfsd4_decode_stateid4(argp, &seek->seek_stateid); @@ -1980,12 +2076,15 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) if (xdr_stream_decode_u32(argp->xdr, &seek->seek_whence) < 0) return nfserr_bad_xdr; + seek->seek_eof = 0; + seek->seek_pos = 0; return nfs_ok; } static __be32 -nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) +nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_clone *clone = &u->clone; __be32 status; status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid); @@ -2110,11 +2209,13 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) */ static __be32 nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, - struct nfsd4_getxattr *getxattr) + union nfsd4_op_u *u) { + struct nfsd4_getxattr *getxattr = &u->getxattr; __be32 status; u32 maxcount; + memset(getxattr, 0, sizeof(*getxattr)); status = nfsd4_decode_xattr_name(argp, &getxattr->getxa_name); if (status) return status; @@ -2123,17 +2224,19 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount); getxattr->getxa_len = maxcount; - - return status; + return nfs_ok; } static __be32 nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, - struct nfsd4_setxattr *setxattr) + union nfsd4_op_u *u) { + struct nfsd4_setxattr *setxattr = &u->setxattr; u32 flags, maxcount, size; __be32 status; + memset(setxattr, 0, sizeof(*setxattr)); + if (xdr_stream_decode_u32(argp->xdr, &flags) < 0) return nfserr_bad_xdr; @@ -2168,10 +2271,13 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, - struct nfsd4_listxattrs *listxattrs) + union nfsd4_op_u *u) { + struct nfsd4_listxattrs *listxattrs = &u->listxattrs; u32 maxcount; + memset(listxattrs, 0, sizeof(*listxattrs)); + if (xdr_stream_decode_u64(argp->xdr, &listxattrs->lsxa_cookie) < 0) return nfserr_bad_xdr; @@ -2197,112 +2303,114 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp, - struct nfsd4_removexattr *removexattr) + union nfsd4_op_u *u) { + struct nfsd4_removexattr *removexattr = &u->removexattr; + memset(removexattr, 0, sizeof(*removexattr)); return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name); } static __be32 -nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { return nfs_ok; } static __be32 -nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { return nfserr_notsupp; } -typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); +typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u); static const nfsd4_dec nfsd4_dec_ops[] = { - [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, - [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, - [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, - [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, - [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, - [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, - [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, - [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, - [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, - [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, - [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, - [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, - [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, - [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, - [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, - [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, - [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_READ] = (nfsd4_dec)nfsd4_decode_read, - [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, - [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, - [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, - [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, - [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew, - [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, - [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, - [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid, - [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm, - [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, + [OP_ACCESS] = nfsd4_decode_access, + [OP_CLOSE] = nfsd4_decode_close, + [OP_COMMIT] = nfsd4_decode_commit, + [OP_CREATE] = nfsd4_decode_create, + [OP_DELEGPURGE] = nfsd4_decode_notsupp, + [OP_DELEGRETURN] = nfsd4_decode_delegreturn, + [OP_GETATTR] = nfsd4_decode_getattr, + [OP_GETFH] = nfsd4_decode_noop, + [OP_LINK] = nfsd4_decode_link, + [OP_LOCK] = nfsd4_decode_lock, + [OP_LOCKT] = nfsd4_decode_lockt, + [OP_LOCKU] = nfsd4_decode_locku, + [OP_LOOKUP] = nfsd4_decode_lookup, + [OP_LOOKUPP] = nfsd4_decode_noop, + [OP_NVERIFY] = nfsd4_decode_verify, + [OP_OPEN] = nfsd4_decode_open, + [OP_OPENATTR] = nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm, + [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade, + [OP_PUTFH] = nfsd4_decode_putfh, + [OP_PUTPUBFH] = nfsd4_decode_putpubfh, + [OP_PUTROOTFH] = nfsd4_decode_noop, + [OP_READ] = nfsd4_decode_read, + [OP_READDIR] = nfsd4_decode_readdir, + [OP_READLINK] = nfsd4_decode_noop, + [OP_REMOVE] = nfsd4_decode_remove, + [OP_RENAME] = nfsd4_decode_rename, + [OP_RENEW] = nfsd4_decode_renew, + [OP_RESTOREFH] = nfsd4_decode_noop, + [OP_SAVEFH] = nfsd4_decode_noop, + [OP_SECINFO] = nfsd4_decode_secinfo, + [OP_SETATTR] = nfsd4_decode_setattr, + [OP_SETCLIENTID] = nfsd4_decode_setclientid, + [OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm, + [OP_VERIFY] = nfsd4_decode_verify, + [OP_WRITE] = nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = nfsd4_decode_release_lockowner, /* new operations for NFSv4.1 */ - [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, - [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, - [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, - [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, - [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, - [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BACKCHANNEL_CTL] = nfsd4_decode_backchannel_ctl, + [OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session, + [OP_EXCHANGE_ID] = nfsd4_decode_exchange_id, + [OP_CREATE_SESSION] = nfsd4_decode_create_session, + [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session, + [OP_FREE_STATEID] = nfsd4_decode_free_stateid, + [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo, - [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit, - [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget, - [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn, + [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo, + [OP_GETDEVICELIST] = nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = nfsd4_decode_layoutcommit, + [OP_LAYOUTGET] = nfsd4_decode_layoutget, + [OP_LAYOUTRETURN] = nfsd4_decode_layoutreturn, #else - [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICEINFO] = nfsd4_decode_notsupp, + [OP_GETDEVICELIST] = nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = nfsd4_decode_notsupp, + [OP_LAYOUTGET] = nfsd4_decode_notsupp, + [OP_LAYOUTRETURN] = nfsd4_decode_notsupp, #endif - [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, - [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, - [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, - [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, - [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, + [OP_SECINFO_NO_NAME] = nfsd4_decode_secinfo_no_name, + [OP_SEQUENCE] = nfsd4_decode_sequence, + [OP_SET_SSV] = nfsd4_decode_notsupp, + [OP_TEST_STATEID] = nfsd4_decode_test_stateid, + [OP_WANT_DELEGATION] = nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] = nfsd4_decode_destroy_clientid, + [OP_RECLAIM_COMPLETE] = nfsd4_decode_reclaim_complete, /* new operations for NFSv4.2 */ - [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, - [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, - [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify, - [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, - [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, - [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, - [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, + [OP_ALLOCATE] = nfsd4_decode_fallocate, + [OP_COPY] = nfsd4_decode_copy, + [OP_COPY_NOTIFY] = nfsd4_decode_copy_notify, + [OP_DEALLOCATE] = nfsd4_decode_fallocate, + [OP_IO_ADVISE] = nfsd4_decode_notsupp, + [OP_LAYOUTERROR] = nfsd4_decode_notsupp, + [OP_LAYOUTSTATS] = nfsd4_decode_notsupp, + [OP_OFFLOAD_CANCEL] = nfsd4_decode_offload_status, + [OP_OFFLOAD_STATUS] = nfsd4_decode_offload_status, + [OP_READ_PLUS] = nfsd4_decode_read, + [OP_SEEK] = nfsd4_decode_seek, + [OP_WRITE_SAME] = nfsd4_decode_notsupp, + [OP_CLONE] = nfsd4_decode_clone, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr, - [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr, - [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs, - [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr, + [OP_GETXATTR] = nfsd4_decode_getxattr, + [OP_SETXATTR] = nfsd4_decode_setxattr, + [OP_LISTXATTRS] = nfsd4_decode_listxattrs, + [OP_REMOVEXATTR] = nfsd4_decode_removexattr, }; static inline bool @@ -2319,7 +2427,7 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) return true; } -static int +static bool nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { struct nfsd4_op *op; @@ -2332,35 +2440,34 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) int i; if (xdr_stream_decode_u32(argp->xdr, &argp->taglen) < 0) - return 0; + return false; max_reply += XDR_UNIT; argp->tag = NULL; if (unlikely(argp->taglen)) { if (argp->taglen > NFSD4_MAX_TAGLEN) - return 0; + return false; p = xdr_inline_decode(argp->xdr, argp->taglen); if (!p) - return 0; + return false; argp->tag = svcxdr_savemem(argp, p, argp->taglen); if (!argp->tag) - return 0; + return false; max_reply += xdr_align_size(argp->taglen); } if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) - return 0; + return false; if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0) - return 0; + return false; argp->opcnt = min_t(u32, argp->client_opcnt, NFSD_MAX_OPS_PER_COMPOUND); if (argp->opcnt > ARRAY_SIZE(argp->iops)) { - argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); + argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops)); if (!argp->ops) { argp->ops = argp->iops; - dprintk("nfsd: couldn't allocate room for COMPOUND\n"); - return 0; + return false; } } @@ -2373,7 +2480,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->opdesc = NULL; if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0) - return 0; + return false; if (nfsd4_opnum_in_range(argp, op)) { op->opdesc = OPDESC(op); op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); @@ -2421,7 +2528,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); - return 1; + return true; } static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, @@ -2435,6 +2542,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, return p; } +static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr, + struct timespec64 *tv) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 3); + if (!p) + return nfserr_resource; + + p = xdr_encode_hyper(p, (s64)tv->tv_sec); + *p = cpu_to_be32(tv->tv_nsec); + return nfs_ok; +} + /* * ctime (in NFSv4, time_metadata) is not writeable, and the client * doesn't really care what resolution could theoretically be stored by @@ -2763,9 +2884,10 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32 } -static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) +static int nfsd4_get_mounted_on_ino(struct svc_export *exp, u64 *pino) { struct path path = exp->ex_path; + struct kstat stat; int err; path_get(&path); @@ -2773,8 +2895,10 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) if (path.dentry != path.mnt->mnt_root) break; } - err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); + err = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); path_put(&path); + if (!err) + *pino = stat.ino; return err; } @@ -2827,10 +2951,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct kstat stat; struct svc_fh *tempfh = NULL; struct kstatfs statfs; - __be32 *p; + __be32 *p, *attrlen_p; int starting_len = xdr->buf->len; int attrlen_offset; - __be32 attrlen; u32 dummy; u64 dummy64; u32 rdattr_err = 0; @@ -2862,6 +2985,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (err) goto out_nfserr; + if (!(stat.result_mask & STATX_BTIME)) + /* underlying FS does not offer btime so we can't share it */ + bmval1 &= ~FATTR4_WORD1_TIME_CREATE; if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | @@ -2915,10 +3041,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; attrlen_offset = xdr->buf->len; - p = xdr_reserve_space(xdr, 4); - if (!p) + attrlen_p = xdr_reserve_space(xdr, XDR_UNIT); + if (!attrlen_p) goto out_resource; - p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { u32 supp[3]; @@ -3104,7 +3229,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); if (!p) goto out_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { @@ -3236,11 +3361,14 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); - *p++ = cpu_to_be32(stat.atime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.atime); + if (status) + goto out; + } + if (bmval1 & FATTR4_WORD1_TIME_CREATE) { + status = nfsd4_encode_nfstime4(xdr, &stat.btime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { p = xdr_reserve_space(xdr, 12); @@ -3249,36 +3377,31 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = encode_time_delta(p, d_inode(dentry)); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); - *p++ = cpu_to_be32(stat.ctime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.ctime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); - *p++ = cpu_to_be32(stat.mtime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.mtime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { - struct kstat parent_stat; u64 ino = stat.ino; p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; /* - * Get parent's attributes if not ignoring crossmount - * and this is the root of a cross-mounted filesystem. + * Get ino of mountpoint in parent filesystem, if not ignoring + * crossmount and this is the root of a cross-mounted + * filesystem. */ if (ignore_crossmnt == 0 && dentry == exp->ex_path.mnt->mnt_root) { - err = get_parent_attributes(exp, &parent_stat); + err = nfsd4_get_mounted_on_ino(exp, &ino); if (err) goto out_nfserr; - ino = parent_stat.ino; } p = xdr_encode_hyper(p, ino); } @@ -3333,8 +3456,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, *p++ = cpu_to_be32(err == 0); } - attrlen = htonl(xdr->buf->len - attrlen_offset - 4); - write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); + *attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT); status = nfs_ok; out: @@ -3503,7 +3625,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, p = xdr_reserve_space(xdr, 3*4 + namlen); if (!p) goto fail; - p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ + p = xdr_encode_hyper(p, OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen); @@ -3587,8 +3709,10 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) } static __be32 -nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) +nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_access *access = &u->access; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3600,8 +3724,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ return 0; } -static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) +static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3617,8 +3743,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, } static __be32 -nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) +nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_close *close = &u->close; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &close->cl_stateid); @@ -3626,8 +3754,10 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 -nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) +nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_commit *commit = &u->commit; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3640,8 +3770,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) +nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_create *create = &u->create; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3654,8 +3786,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) +nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_getattr *getattr = &u->getattr; struct svc_fh *fhp = getattr->ga_fhp; struct xdr_stream *xdr = resp->xdr; @@ -3664,8 +3798,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } static __be32 -nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) +nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct svc_fh **fhpp = &u->getfh; struct xdr_stream *xdr = resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; @@ -3675,7 +3811,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh p = xdr_reserve_space(xdr, len + 4); if (!p) return nfserr_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, len); return 0; } @@ -3719,8 +3855,10 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) } static __be32 -nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) +nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_lock *lock = &u->lock; struct xdr_stream *xdr = resp->xdr; if (!nfserr) @@ -3732,8 +3870,10 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo } static __be32 -nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) +nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_lockt *lockt = &u->lockt; struct xdr_stream *xdr = resp->xdr; if (nfserr == nfserr_denied) @@ -3742,8 +3882,10 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l } static __be32 -nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) +nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_locku *locku = &u->locku; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &locku->lu_stateid); @@ -3751,8 +3893,10 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 -nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) +nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_link *link = &u->link; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3765,8 +3909,10 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 -nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) +nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open *open = &u->open; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3859,16 +4005,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op } static __be32 -nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) +nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open_confirm *oc = &u->open_confirm; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } static __be32 -nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) +nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open_downgrade *od = &u->open_downgrade; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &od->od_stateid); @@ -3882,16 +4032,15 @@ static __be32 nfsd4_encode_splice_read( struct xdr_stream *xdr = resp->xdr; struct xdr_buf *buf = xdr->buf; int status, space_left; - u32 eof; __be32 nfserr; - __be32 *p = xdr->p - 2; /* Make sure there will be room for padding if needed */ if (xdr->end - xdr->p < 1) return nfserr_resource; nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, - file, read->rd_offset, &maxcount, &eof); + file, read->rd_offset, &maxcount, + &read->rd_eof); read->rd_length = maxcount; if (nfserr) goto out_err; @@ -3902,9 +4051,6 @@ static __be32 nfsd4_encode_splice_read( goto out_err; } - *(p++) = htonl(eof); - *(p++) = htonl(maxcount); - buf->page_len = maxcount; buf->len += maxcount; xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1) @@ -3946,11 +4092,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct file *file, unsigned long maxcount) { struct xdr_stream *xdr = resp->xdr; - u32 eof; - int starting_len = xdr->buf->len - 8; + unsigned int starting_len = xdr->buf->len; + __be32 zero = xdr_zero; __be32 nfserr; - __be32 tmp; - int pad; read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); if (read->rd_vlen < 0) @@ -3958,31 +4102,25 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount, - &eof); + &read->rd_eof); read->rd_length = maxcount; if (nfserr) return nfserr; - if (svc_encode_result_payload(resp->rqstp, starting_len + 8, maxcount)) + if (svc_encode_result_payload(resp->rqstp, starting_len, maxcount)) return nfserr_io; - xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount)); - - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); - tmp = htonl(maxcount); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); - - tmp = xdr_zero; - pad = (maxcount&3) ? 4 - (maxcount&3) : 0; - write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, - &tmp, pad); - return 0; + xdr_truncate_encode(xdr, starting_len + xdr_align_size(maxcount)); + write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &zero, + xdr_pad_size(maxcount)); + return nfs_ok; } static __be32 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) + union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); unsigned long maxcount; struct xdr_stream *xdr = resp->xdr; struct file *file; @@ -3995,11 +4133,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) { - WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); + WARN_ON_ONCE(splice_ok); return nfserr_resource; } - if (resp->xdr->buf->page_len && - test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { + if (resp->xdr->buf->page_len && splice_ok) { WARN_ON_ONCE(1); return nfserr_serverfault; } @@ -4008,31 +4145,32 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, maxcount = min_t(unsigned long, read->rd_length, (xdr->buf->buflen - xdr->buf->len)); - if (file->f_op->splice_read && - test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) + if (file->f_op->splice_read && splice_ok) nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); - - if (nfserr) + if (nfserr) { xdr_truncate_encode(xdr, starting_len); + return nfserr; + } - return nfserr; + p = xdr_encode_bool(p, read->rd_eof); + *p = cpu_to_be32(read->rd_length); + return nfs_ok; } static __be32 -nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { - int maxcount; - __be32 wire_count; - int zero = 0; + struct nfsd4_readlink *readlink = &u->readlink; + __be32 *p, *maxcount_p, zero = xdr_zero; struct xdr_stream *xdr = resp->xdr; int length_offset = xdr->buf->len; - int status; - __be32 *p; + int maxcount, status; - p = xdr_reserve_space(xdr, 4); - if (!p) + maxcount_p = xdr_reserve_space(xdr, XDR_UNIT); + if (!maxcount_p) return nfserr_resource; maxcount = PAGE_SIZE; @@ -4057,14 +4195,11 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd nfserr = nfserrno(status); goto out_err; } - - wire_count = htonl(maxcount); - write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); - xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4)); - if (maxcount & 3) - write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, - &zero, 4 - (maxcount&3)); - return 0; + *maxcount_p = cpu_to_be32(maxcount); + xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount)); + write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero, + xdr_pad_size(maxcount)); + return nfs_ok; out_err: xdr_truncate_encode(xdr, length_offset); @@ -4072,8 +4207,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd } static __be32 -nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) +nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_readdir *readdir = &u->readdir; int maxcount; int bytes_left; loff_t offset; @@ -4163,8 +4300,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } static __be32 -nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) +nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_remove *remove = &u->remove; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4176,8 +4315,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) +nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_rename *rename = &u->rename; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4259,8 +4400,9 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo *secinfo = &u->secinfo; struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); @@ -4268,8 +4410,9 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo_no_name *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name; struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); @@ -4280,8 +4423,10 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, * regardless of the error status. */ static __be32 -nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) +nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_setattr *setattr = &u->setattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4304,8 +4449,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } static __be32 -nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) +nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_setclientid *scd = &u->setclientid; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4328,8 +4475,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n } static __be32 -nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) +nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_write *write = &u->write; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4345,8 +4494,9 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_exchange_id *exid) + union nfsd4_op_u *u) { + struct nfsd4_exchange_id *exid = &u->exchange_id; struct xdr_stream *xdr = resp->xdr; __be32 *p; char *major_id; @@ -4423,8 +4573,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_create_session *sess) + union nfsd4_op_u *u) { + struct nfsd4_create_session *sess = &u->create_session; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4476,8 +4627,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_sequence *seq) + union nfsd4_op_u *u) { + struct nfsd4_sequence *seq = &u->sequence; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4499,8 +4651,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_test_stateid *test_stateid) + union nfsd4_op_u *u) { + struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct xdr_stream *xdr = resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; @@ -4520,8 +4673,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_getdeviceinfo *gdev) + union nfsd4_op_u *u) { + struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; @@ -4573,8 +4727,9 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutget *lgp) + union nfsd4_op_u *u) { + struct nfsd4_layoutget *lgp = &u->layoutget; struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; __be32 *p; @@ -4600,8 +4755,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutcommit *lcp) + union nfsd4_op_u *u) { + struct nfsd4_layoutcommit *lcp = &u->layoutcommit; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4621,8 +4777,9 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutreturn *lrp) + union nfsd4_op_u *u) { + struct nfsd4_layoutreturn *lrp = &u->layoutreturn; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4707,25 +4864,27 @@ nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) static __be32 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_copy *copy) + union nfsd4_op_u *u) { + struct nfsd4_copy *copy = &u->copy; __be32 *p; nfserr = nfsd42_encode_write_res(resp, ©->cp_res, - !!copy->cp_synchronous); + nfsd4_copy_is_sync(copy)); if (nfserr) return nfserr; p = xdr_reserve_space(resp->xdr, 4 + 4); *p++ = xdr_one; /* cr_consecutive */ - *p++ = cpu_to_be32(copy->cp_synchronous); + *p = nfsd4_copy_is_sync(copy) ? xdr_one : xdr_zero; return 0; } static __be32 nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_offload_status *os) + union nfsd4_op_u *u) { + struct nfsd4_offload_status *os = &u->offload_status; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4739,156 +4898,83 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, - struct nfsd4_read *read, - unsigned long *maxcount, u32 *eof, - loff_t *pos) + struct nfsd4_read *read) { - struct xdr_stream *xdr = resp->xdr; + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); struct file *file = read->rd_nf->nf_file; - int starting_len = xdr->buf->len; - loff_t hole_pos; - __be32 nfserr; - __be32 *p, tmp; - __be64 tmp64; - - hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE); - if (hole_pos > read->rd_offset) - *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset); - *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len)); + struct xdr_stream *xdr = resp->xdr; + unsigned long maxcount; + __be32 nfserr, *p; /* Content type, offset, byte count */ p = xdr_reserve_space(xdr, 4 + 8 + 4); if (!p) - return nfserr_resource; + return nfserr_io; + if (resp->xdr->buf->page_len && splice_ok) { + WARN_ON_ONCE(splice_ok); + return nfserr_serverfault; + } - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount); - if (read->rd_vlen < 0) - return nfserr_resource; + maxcount = min_t(unsigned long, read->rd_length, + (xdr->buf->buflen - xdr->buf->len)); - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, - resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); + if (file->f_op->splice_read && splice_ok) + nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); + else + nfserr = nfsd4_encode_readv(resp, read, file, maxcount); if (nfserr) return nfserr; - xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); - - tmp = htonl(NFS4_CONTENT_DATA); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp64 = cpu_to_be64(read->rd_offset); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); - tmp = htonl(*maxcount); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); - - tmp = xdr_zero; - write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, - xdr_pad_size(*maxcount)); - return nfs_ok; -} - -static __be32 -nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, - struct nfsd4_read *read, - unsigned long *maxcount, u32 *eof) -{ - struct file *file = read->rd_nf->nf_file; - loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA); - loff_t f_size = i_size_read(file_inode(file)); - unsigned long count; - __be32 *p; - - if (data_pos == -ENXIO) - data_pos = f_size; - else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE)) - return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size); - count = data_pos - read->rd_offset; - - /* Content type, offset, byte count */ - p = xdr_reserve_space(resp->xdr, 4 + 8 + 8); - if (!p) - return nfserr_resource; - *p++ = htonl(NFS4_CONTENT_HOLE); - p = xdr_encode_hyper(p, read->rd_offset); - p = xdr_encode_hyper(p, count); + *p++ = cpu_to_be32(NFS4_CONTENT_DATA); + p = xdr_encode_hyper(p, read->rd_offset); + *p = cpu_to_be32(read->rd_length); - *eof = (read->rd_offset + count) >= f_size; - *maxcount = min_t(unsigned long, count, *maxcount); return nfs_ok; } static __be32 nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) + union nfsd4_op_u *u) { - unsigned long maxcount, count; + struct nfsd4_read *read = &u->read; + struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; - struct file *file; int starting_len = xdr->buf->len; - int last_segment = xdr->buf->len; - int segments = 0; - __be32 *p, tmp; - bool is_data; - loff_t pos; - u32 eof; + u32 segments = 0; + __be32 *p; if (nfserr) return nfserr; - file = read->rd_nf->nf_file; /* eof flag, segment count */ p = xdr_reserve_space(xdr, 4 + 4); if (!p) - return nfserr_resource; + return nfserr_io; xdr_commit_encode(xdr); - maxcount = min_t(unsigned long, read->rd_length, - (xdr->buf->buflen - xdr->buf->len)); - count = maxcount; - - eof = read->rd_offset >= i_size_read(file_inode(file)); - if (eof) + read->rd_eof = read->rd_offset >= i_size_read(file_inode(file)); + if (read->rd_eof) goto out; - pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); - is_data = pos > read->rd_offset; - - while (count > 0 && !eof) { - maxcount = count; - if (is_data) - nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof, - segments == 0 ? &pos : NULL); - else - nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof); - if (nfserr) - goto out; - count -= maxcount; - read->rd_offset += maxcount; - is_data = !is_data; - last_segment = xdr->buf->len; - segments++; - } - -out: - if (nfserr && segments == 0) + nfserr = nfsd4_encode_read_plus_data(resp, read); + if (nfserr) { xdr_truncate_encode(xdr, starting_len); - else { - if (nfserr) { - xdr_truncate_encode(xdr, last_segment); - nfserr = nfs_ok; - eof = 0; - } - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp = htonl(segments); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + return nfserr; } + segments++; + +out: + p = xdr_encode_bool(p, read->rd_eof); + *p = cpu_to_be32(segments); return nfserr; } static __be32 nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_copy_notify *cn) + union nfsd4_op_u *u) { + struct nfsd4_copy_notify *cn = &u->copy_notify; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4916,13 +5002,15 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = cpu_to_be32(1); - return nfsd42_encode_nl4_server(resp, &cn->cpn_src); + nfserr = nfsd42_encode_nl4_server(resp, cn->cpn_src); + return nfserr; } static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_seek *seek) + union nfsd4_op_u *u) { + struct nfsd4_seek *seek = &u->seek; __be32 *p; p = xdr_reserve_space(resp->xdr, 4 + 8); @@ -4933,7 +5021,8 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) +nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *p) { return nfserr; } @@ -4984,8 +5073,9 @@ nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen) static __be32 nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_getxattr *getxattr) + union nfsd4_op_u *u) { + struct nfsd4_getxattr *getxattr = &u->getxattr; struct xdr_stream *xdr = resp->xdr; __be32 *p, err; @@ -5008,8 +5098,9 @@ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_setxattr *setxattr) + union nfsd4_op_u *u) { + struct nfsd4_setxattr *setxattr = &u->setxattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -5049,8 +5140,9 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs, static __be32 nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_listxattrs *listxattrs) + union nfsd4_op_u *u) { + struct nfsd4_listxattrs *listxattrs = &u->listxattrs; struct xdr_stream *xdr = resp->xdr; u32 cookie_offset, count_offset, eof; u32 left, xdrleft, slen, count; @@ -5160,8 +5252,9 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_removexattr *removexattr) + union nfsd4_op_u *u) { + struct nfsd4_removexattr *removexattr = &u->removexattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -5173,7 +5266,7 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, return 0; } -typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); +typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u); /* * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 @@ -5181,93 +5274,93 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); * done in the decoding phase. */ static const nfsd4_enc nfsd4_enc_ops[] = { - [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, - [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, - [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, - [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create, - [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr, - [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh, - [OP_LINK] = (nfsd4_enc)nfsd4_encode_link, - [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock, - [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt, - [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku, - [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop, - [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open, - [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm, - [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade, - [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_READ] = (nfsd4_enc)nfsd4_encode_read, - [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir, - [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink, - [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove, - [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename, - [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop, - [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo, - [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr, - [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid, - [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop, - [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, - [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, + [OP_ACCESS] = nfsd4_encode_access, + [OP_CLOSE] = nfsd4_encode_close, + [OP_COMMIT] = nfsd4_encode_commit, + [OP_CREATE] = nfsd4_encode_create, + [OP_DELEGPURGE] = nfsd4_encode_noop, + [OP_DELEGRETURN] = nfsd4_encode_noop, + [OP_GETATTR] = nfsd4_encode_getattr, + [OP_GETFH] = nfsd4_encode_getfh, + [OP_LINK] = nfsd4_encode_link, + [OP_LOCK] = nfsd4_encode_lock, + [OP_LOCKT] = nfsd4_encode_lockt, + [OP_LOCKU] = nfsd4_encode_locku, + [OP_LOOKUP] = nfsd4_encode_noop, + [OP_LOOKUPP] = nfsd4_encode_noop, + [OP_NVERIFY] = nfsd4_encode_noop, + [OP_OPEN] = nfsd4_encode_open, + [OP_OPENATTR] = nfsd4_encode_noop, + [OP_OPEN_CONFIRM] = nfsd4_encode_open_confirm, + [OP_OPEN_DOWNGRADE] = nfsd4_encode_open_downgrade, + [OP_PUTFH] = nfsd4_encode_noop, + [OP_PUTPUBFH] = nfsd4_encode_noop, + [OP_PUTROOTFH] = nfsd4_encode_noop, + [OP_READ] = nfsd4_encode_read, + [OP_READDIR] = nfsd4_encode_readdir, + [OP_READLINK] = nfsd4_encode_readlink, + [OP_REMOVE] = nfsd4_encode_remove, + [OP_RENAME] = nfsd4_encode_rename, + [OP_RENEW] = nfsd4_encode_noop, + [OP_RESTOREFH] = nfsd4_encode_noop, + [OP_SAVEFH] = nfsd4_encode_noop, + [OP_SECINFO] = nfsd4_encode_secinfo, + [OP_SETATTR] = nfsd4_encode_setattr, + [OP_SETCLIENTID] = nfsd4_encode_setclientid, + [OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop, + [OP_VERIFY] = nfsd4_encode_noop, + [OP_WRITE] = nfsd4_encode_write, + [OP_RELEASE_LOCKOWNER] = nfsd4_encode_noop, /* NFSv4.1 operations */ - [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, - [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, - [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, - [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_BACKCHANNEL_CTL] = nfsd4_encode_noop, + [OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session, + [OP_EXCHANGE_ID] = nfsd4_encode_exchange_id, + [OP_CREATE_SESSION] = nfsd4_encode_create_session, + [OP_DESTROY_SESSION] = nfsd4_encode_noop, + [OP_FREE_STATEID] = nfsd4_encode_noop, + [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo, - [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit, - [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget, - [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn, + [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo, + [OP_GETDEVICELIST] = nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = nfsd4_encode_layoutcommit, + [OP_LAYOUTGET] = nfsd4_encode_layoutget, + [OP_LAYOUTRETURN] = nfsd4_encode_layoutreturn, #else - [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICEINFO] = nfsd4_encode_noop, + [OP_GETDEVICELIST] = nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = nfsd4_encode_noop, + [OP_LAYOUTGET] = nfsd4_encode_noop, + [OP_LAYOUTRETURN] = nfsd4_encode_noop, #endif - [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, - [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, - [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, - [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid, - [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, - [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, - [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO_NO_NAME] = nfsd4_encode_secinfo_no_name, + [OP_SEQUENCE] = nfsd4_encode_sequence, + [OP_SET_SSV] = nfsd4_encode_noop, + [OP_TEST_STATEID] = nfsd4_encode_test_stateid, + [OP_WANT_DELEGATION] = nfsd4_encode_noop, + [OP_DESTROY_CLIENTID] = nfsd4_encode_noop, + [OP_RECLAIM_COMPLETE] = nfsd4_encode_noop, /* NFSv4.2 operations */ - [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, - [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify, - [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, - [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, - [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, - [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_ALLOCATE] = nfsd4_encode_noop, + [OP_COPY] = nfsd4_encode_copy, + [OP_COPY_NOTIFY] = nfsd4_encode_copy_notify, + [OP_DEALLOCATE] = nfsd4_encode_noop, + [OP_IO_ADVISE] = nfsd4_encode_noop, + [OP_LAYOUTERROR] = nfsd4_encode_noop, + [OP_LAYOUTSTATS] = nfsd4_encode_noop, + [OP_OFFLOAD_CANCEL] = nfsd4_encode_noop, + [OP_OFFLOAD_STATUS] = nfsd4_encode_offload_status, + [OP_READ_PLUS] = nfsd4_encode_read_plus, + [OP_SEEK] = nfsd4_encode_seek, + [OP_WRITE_SAME] = nfsd4_encode_noop, + [OP_CLONE] = nfsd4_encode_noop, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr, - [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr, - [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs, - [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr, + [OP_GETXATTR] = nfsd4_encode_getxattr, + [OP_SETXATTR] = nfsd4_encode_setxattr, + [OP_LISTXATTRS] = nfsd4_encode_listxattrs, + [OP_REMOVEXATTR] = nfsd4_encode_removexattr, }; /* @@ -5311,10 +5404,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) __be32 *p; p = xdr_reserve_space(xdr, 8); - if (!p) { - WARN_ON_ONCE(1); - return; - } + if (!p) + goto release; *p++ = cpu_to_be32(op->opnum); post_err_offset = xdr->buf->len; @@ -5329,8 +5420,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) op->status = encoder(resp, op->status, &op->u); if (op->status) trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status); - if (opdesc && opdesc->op_release) - opdesc->op_release(&op->u); xdr_commit_encode(xdr); /* nfsd4_check_resp_size guarantees enough room for error status */ @@ -5370,8 +5459,10 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) so->so_replay.rp_buf, len); } status: - /* Note that op->status is already in network byte order: */ - write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4); + *p = op->status; +release: + if (opdesc && opdesc->op_release) + opdesc->op_release(&op->u); } /* @@ -5402,7 +5493,7 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) struct nfsd4_compoundargs *args = rqstp->rq_argp; if (args->ops != args->iops) { - kfree(args->ops); + vfree(args->ops); args->ops = args->iops; } while (args->to_free) { @@ -5412,40 +5503,42 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) } } -int -nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd4_compoundargs *args = rqstp->rq_argp; /* svcxdr_tmp_alloc */ args->to_free = NULL; - args->xdr = &rqstp->rq_arg_stream; + args->xdr = xdr; args->ops = args->iops; args->rqstp = rqstp; return nfsd4_decode_compound(args); } -int -nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct xdr_buf *buf = resp->xdr->buf; + __be32 *p; - WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + - buf->tail[0].iov_len); + /* + * Send buffer space for the following items is reserved + * at the top of nfsd4_proc_compound(). + */ + p = resp->statusp; - *p = resp->cstate.status; + *p++ = resp->cstate.status; - rqstp->rq_next_page = resp->xdr->page_ptr + 1; + rqstp->rq_next_page = xdr->page_ptr + 1; - p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); nfsd4_sequence_done(resp); - return 1; + return true; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 830bb8493c..2b5417e06d 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -84,12 +84,6 @@ nfsd_hashsize(unsigned int limit) return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); } -static u32 -nfsd_cache_hash(__be32 xid, struct nfsd_net *nn) -{ - return hash_32(be32_to_cpu(xid), nn->maskbits); -} - static struct svc_cacherep * nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum, struct nfsd_net *nn) @@ -241,8 +235,16 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) list_move_tail(&rp->c_lru, &b->lru_head); } -static long -prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) +static noinline struct nfsd_drc_bucket * +nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn) +{ + unsigned int hash = hash_32((__force u32)xid, nn->maskbits); + + return &nn->drc_hashtbl[hash]; +} + +static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn, + unsigned int max) { struct svc_cacherep *rp, *tmp; long freed = 0; @@ -258,11 +260,17 @@ prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; nfsd_reply_cache_free_locked(b, rp, nn); - freed++; + if (max && freed++ > max) + break; } return freed; } +static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) +{ + return prune_bucket(b, nn, 3); +} + /* * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. @@ -279,7 +287,7 @@ prune_cache_entries(struct nfsd_net *nn) if (list_empty(&b->lru_head)) continue; spin_lock(&b->cache_lock); - freed += prune_bucket(b, nn); + freed += prune_bucket(b, nn, 0); spin_unlock(&b->cache_lock); } return freed; @@ -413,12 +421,10 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key, */ int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfsd_net *nn; struct svc_cacherep *rp, *found; - __be32 xid = rqstp->rq_xid; __wsum csum; - u32 hash = nfsd_cache_hash(xid, nn); - struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash]; + struct nfsd_drc_bucket *b; int type = rqstp->rq_cachetype; int rtn = RC_DOIT; @@ -434,17 +440,16 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) * Since the common case is a cache miss followed by an insert, * preallocate an entry. */ + nn = net_generic(SVC_NET(rqstp), nfsd_net_id); rp = nfsd_reply_cache_alloc(rqstp, csum, nn); if (!rp) goto out; + b = nfsd_cache_bucket_find(rqstp->rq_xid, nn); spin_lock(&b->cache_lock); found = nfsd_cache_insert(b, rp, nn); - if (found != rp) { - nfsd_reply_cache_free_locked(NULL, rp, nn); - rp = found; + if (found != rp) goto found_entry; - } nfsd_stats_rc_misses_inc(); rqstp->rq_cacherep = rp; @@ -453,8 +458,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) atomic_inc(&nn->num_drc_entries); nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp)); - /* go ahead and prune the cache */ - prune_bucket(b, nn); + nfsd_prune_bucket(b, nn); out_unlock: spin_unlock(&b->cache_lock); @@ -463,8 +467,10 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) found_entry: /* We found a matching entry which is either in progress or done. */ + nfsd_reply_cache_free_locked(NULL, rp, nn); nfsd_stats_rc_hits_inc(); rtn = RC_DROPIT; + rp = found; /* Request being processed */ if (rp->c_state == RC_INPROG) @@ -523,7 +529,6 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; - u32 hash; struct nfsd_drc_bucket *b; int len; size_t bufsize = 0; @@ -531,8 +536,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) if (!rp) return; - hash = nfsd_cache_hash(rp->c_key.k_xid, nn); - b = &nn->drc_hashtbl[hash]; + b = nfsd_cache_bucket_find(rp->c_key.k_xid, nn); len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; @@ -599,9 +603,10 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) * scraping this file for info should test the labels to ensure they're * getting the correct field. */ -static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) +int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) { - struct nfsd_net *nn = m->private; + struct nfsd_net *nn = net_generic(file_inode(m->file)->i_sb->s_fs_info, + nfsd_net_id); seq_printf(m, "max entries: %u\n", nn->max_drc_entries); seq_printf(m, "num entries: %u\n", @@ -621,11 +626,3 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); return 0; } - -int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) -{ - struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info, - nfsd_net_id); - - return single_open(file, nfsd_reply_cache_stats_show, nn); -} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index cb73c12925..f77f00c931 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -25,6 +25,7 @@ #include "state.h" #include "netns.h" #include "pnfs.h" +#include "filecache.h" /* * We have a single directory with several nodes in it. @@ -45,6 +46,7 @@ enum { NFSD_Ports, NFSD_MaxBlkSize, NFSD_MaxConnections, + NFSD_Filecache, NFSD_SupportedEnctypes, /* * The below MUST come last. Otherwise we leave a hole in nfsd_files[] @@ -183,17 +185,7 @@ static int export_features_show(struct seq_file *m, void *v) return 0; } -static int export_features_open(struct inode *inode, struct file *file) -{ - return single_open(file, export_features_show, NULL); -} - -static const struct file_operations export_features_operations = { - .open = export_features_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(export_features); #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) static int supported_enctypes_show(struct seq_file *m, void *v) @@ -202,17 +194,7 @@ static int supported_enctypes_show(struct seq_file *m, void *v) return 0; } -static int supported_enctypes_open(struct inode *inode, struct file *file) -{ - return single_open(file, supported_enctypes_show, NULL); -} - -static const struct file_operations supported_enctypes_ops = { - .open = supported_enctypes_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(supported_enctypes); #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ static const struct file_operations pool_stats_operations = { @@ -222,12 +204,9 @@ static const struct file_operations pool_stats_operations = { .release = nfsd_pool_stats_release, }; -static const struct file_operations reply_cache_stats_operations = { - .open = nfsd_reply_cache_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats); + +DEFINE_SHOW_ATTRIBUTE(nfsd_file_cache_stats); /*----------------------------------------------------------------------------*/ /* @@ -395,12 +374,12 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) auth_domain_put(dom); if (len) return len; - + mesg = buf; len = SIMPLE_TRANSACTION_LIMIT; - qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size); + qword_addhex(&mesg, &len, fh.fh_raw, fh.fh_size); mesg[-1] = '\n'; - return mesg - buf; + return mesg - buf; } /* @@ -602,7 +581,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; switch(num) { +#ifdef CONFIG_NFSD_V2 case 2: +#endif case 3: nfsd_vers(nn, num, cmd); break; @@ -622,7 +603,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } break; default: - return -EINVAL; + /* Ignore requests to disable non-existent versions */ + if (cmd == NFSD_SET) + return -EINVAL; } vers += len + 1; } while ((len = qword_get(&mesg, vers, size)) > 0); @@ -633,7 +616,6 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } /* Now write current state into reply buffer */ - len = 0; sep = ""; remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) { @@ -727,28 +709,25 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred char *mesg = buf; int fd, err; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; err = get_int(&mesg, &fd); if (err != 0 || fd < 0) return -EINVAL; - if (svc_alien_sock(net, fd)) { - printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__); - return -EINVAL; - } - err = nfsd_create_serv(net); if (err != 0) return err; - err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); - if (err < 0) { - nfsd_destroy(net); - return err; - } + serv = nn->nfsd_serv; + err = svc_addsock(serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); - /* Decrease the count, but don't shut down the service */ - nn->nfsd_serv->sv_nrthreads--; + if (err < 0 && !serv->sv_nrthreads && !nn->keep_active) + nfsd_last_thread(net); + else if (err >= 0 && !serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) + svc_get(serv); + + svc_put(serv); return err; } @@ -762,6 +741,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr struct svc_xprt *xprt; int port, err; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; if (sscanf(buf, "%15s %5u", transport, &port) != 2) return -EINVAL; @@ -773,30 +753,33 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr if (err != 0) return err; - err = svc_create_xprt(nn->nfsd_serv, transport, net, - PF_INET, port, SVC_SOCK_ANONYMOUS, cred); + serv = nn->nfsd_serv; + err = svc_xprt_create(serv, transport, net, + PF_INET, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0) goto out_err; - err = svc_create_xprt(nn->nfsd_serv, transport, net, - PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); + err = svc_xprt_create(serv, transport, net, + PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; - /* Decrease the count, but don't shut down the service */ - nn->nfsd_serv->sv_nrthreads--; + if (!serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) + svc_get(serv); + + svc_put(serv); return 0; out_close: - xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port); + xprt = svc_find_xprt(serv, transport, net, PF_INET, port); if (xprt != NULL) { - svc_close_xprt(xprt); + svc_xprt_close(xprt); svc_xprt_put(xprt); } out_err: - if (!list_empty(&nn->nfsd_serv->sv_permsocks)) - nn->nfsd_serv->sv_nrthreads--; - else - nfsd_destroy(net); + if (!serv->sv_nrthreads && !nn->keep_active) + nfsd_last_thread(net); + + svc_put(serv); return err; } @@ -1359,7 +1342,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) /* Per-export io stats use same ops as exports file */ [NFSD_Export_Stats] = {"export_stats", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", - &export_features_operations, S_IRUGO}, + &export_features_fops, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_FO_UnlockFS] = {"unlock_filesystem", @@ -1368,13 +1351,16 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, - [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO}, + [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", + &nfsd_reply_cache_stats_fops, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, + [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO}, #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) - [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, + [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", + &supported_enctypes_fops, S_IRUGO}, #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, @@ -1474,25 +1460,16 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; + nfsd4_init_leases_net(nn); retval = nfsd_reply_cache_init(nn); if (retval) - goto out_drc_error; - nn->nfsd4_lease = 90; /* default lease time */ - nn->nfsd4_grace = 90; - nn->somebody_reclaimed = false; - nn->track_reclaim_completes = false; - nn->clverifier_counter = prandom_u32(); - nn->clientid_base = prandom_u32(); - nn->clientid_counter = nn->clientid_base + 1; - nn->s2s_cp_cl_id = nn->clientid_counter++; - - atomic_set(&nn->ntf_refcnt, 0); - init_waitqueue_head(&nn->ntf_wq); - seqlock_init(&nn->boot_lock); + goto out_cache_error; + get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); + seqlock_init(&nn->writeverf_lock); return 0; -out_drc_error: +out_cache_error: nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); @@ -1520,7 +1497,6 @@ static struct pernet_operations nfsd_net_ops = { static int __init init_nfsd(void) { int retval; - printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); retval = nfsd4_init_slabs(); if (retval) @@ -1538,20 +1514,25 @@ static int __init init_nfsd(void) retval = create_proc_exports_entry(); if (retval) goto out_free_lockd; - retval = register_filesystem(&nfsd_fs_type); - if (retval) - goto out_free_exports; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) - goto out_free_filesystem; + goto out_free_exports; retval = register_cld_notifier(); + if (retval) + goto out_free_subsys; + retval = nfsd4_create_laundry_wq(); + if (retval) + goto out_free_cld; + retval = register_filesystem(&nfsd_fs_type); if (retval) goto out_free_all; return 0; out_free_all: + nfsd4_destroy_laundry_wq(); +out_free_cld: + unregister_cld_notifier(); +out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); -out_free_filesystem: - unregister_filesystem(&nfsd_fs_type); out_free_exports: remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); @@ -1569,6 +1550,8 @@ static int __init init_nfsd(void) static void __exit exit_nfsd(void) { + unregister_filesystem(&nfsd_fs_type); + nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); @@ -1578,7 +1561,6 @@ static void __exit exit_nfsd(void) nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs(); - unregister_filesystem(&nfsd_fs_type); } MODULE_AUTHOR("Olaf Kirch "); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 9664303afd..013bfa24ce 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -64,8 +64,7 @@ struct readdir_cd { extern struct svc_program nfsd_program; -extern const struct svc_version nfsd_version2, nfsd_version3, - nfsd_version4; +extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct mutex nfsd_mutex; extern spinlock_t nfsd_drc_lock; extern unsigned long nfsd_drc_max_mem; @@ -78,8 +77,10 @@ extern const struct seq_operations nfs_exports_op; */ struct nfsd_voidargs { }; struct nfsd_voidres { }; -int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p); -int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p); +bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, + struct xdr_stream *xdr); +bool nfssvc_encode_voidres(struct svc_rqst *rqstp, + struct xdr_stream *xdr); /* * Function prototypes. @@ -95,8 +96,6 @@ int nfsd_pool_stats_open(struct inode *, struct file *); int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_shutdown_threads(struct net *net); -void nfsd_destroy(struct net *net); - bool i_am_nfsd(void); struct nfsdfs_client { @@ -132,6 +131,7 @@ int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change); int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change); void nfsd_reset_versions(struct nfsd_net *nn); int nfsd_create_serv(struct net *net); +void nfsd_last_thread(struct net *net); extern int nfsd_max_blksize; @@ -160,6 +160,9 @@ void nfs4_state_shutdown_net(struct net *net); int nfs4_reset_recoverydir(char *recdir); char * nfs4_recoverydir(void); bool nfsd4_spo_must_allow(struct svc_rqst *rqstp); +int nfsd4_create_laundry_wq(void); +void nfsd4_destroy_laundry_wq(void); +bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode); #else static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } @@ -173,6 +176,13 @@ static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) { return false; } +static inline int nfsd4_create_laundry_wq(void) { return 0; }; +static inline void nfsd4_destroy_laundry_wq(void) {}; +static inline bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, + struct inode *inode) +{ + return false; +} #endif /* @@ -334,6 +344,10 @@ void nfsd_lockd_shutdown(void); #define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */ #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ +#define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ +#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128 +#define NFS4_CLIENTS_PER_GB 1024 +#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */ /* * The following attributes are currently not supported by the NFSv4 server: @@ -362,7 +376,7 @@ void nfsd_lockd_shutdown(void); | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \ | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \ | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \ - | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \ + | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_CREATE \ | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID) #define NFSD4_SUPPORTED_ATTRS_WORD2 0 @@ -458,7 +472,8 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval) (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ - | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_CREATE \ + | FATTR4_WORD1_TIME_MODIFY_SET) #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #define MAYBE_FATTR4_WORD2_SECURITY_LABEL \ FATTR4_WORD2_SECURITY_LABEL @@ -488,12 +503,16 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif +extern void nfsd4_init_leases_net(struct nfsd_net *nn); + #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) { return 0; } +static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; + #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c475d2271f..8c52b6c9d3 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -154,11 +154,12 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp, static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; - struct fid *fid = NULL, sfid; + struct fid *fid = NULL; struct svc_export *exp; struct dentry *dentry; int fileid_type; int data_left = fh->fh_size/4; + int len; __be32 error; error = nfserr_stale; @@ -167,48 +168,35 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (rqstp->rq_vers == 4 && fh->fh_size == 0) return nfserr_nofilehandle; - if (fh->fh_version == 1) { - int len; - - if (--data_left < 0) - return error; - if (fh->fh_auth_type != 0) - return error; - len = key_len(fh->fh_fsid_type) / 4; - if (len == 0) - return error; - if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { - /* deprecated, convert to type 3 */ - len = key_len(FSID_ENCODE_DEV)/4; - fh->fh_fsid_type = FSID_ENCODE_DEV; - /* - * struct knfsd_fh uses host-endian fields, which are - * sometimes used to hold net-endian values. This - * confuses sparse, so we must use __force here to - * keep it from complaining. - */ - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), - ntohl((__force __be32)fh->fh_fsid[1]))); - fh->fh_fsid[1] = fh->fh_fsid[2]; - } - data_left -= len; - if (data_left < 0) - return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); - fid = (struct fid *)(fh->fh_fsid + len); - } else { - __u32 tfh[2]; - dev_t xdev; - ino_t xino; - - if (fh->fh_size != NFS_FHSIZE) - return error; - /* assume old filehandle format */ - xdev = old_decode_dev(fh->ofh_xdev); - xino = u32_to_ino_t(fh->ofh_xino); - mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); - exp = rqst_exp_find(rqstp, FSID_DEV, tfh); + if (fh->fh_version != 1) + return error; + + if (--data_left < 0) + return error; + if (fh->fh_auth_type != 0) + return error; + len = key_len(fh->fh_fsid_type) / 4; + if (len == 0) + return error; + if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { + /* deprecated, convert to type 3 */ + len = key_len(FSID_ENCODE_DEV)/4; + fh->fh_fsid_type = FSID_ENCODE_DEV; + /* + * struct knfsd_fh uses host-endian fields, which are + * sometimes used to hold net-endian values. This + * confuses sparse, so we must use __force here to + * keep it from complaining. + */ + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), + ntohl((__force __be32)fh->fh_fsid[1]))); + fh->fh_fsid[1] = fh->fh_fsid[2]; } + data_left -= len; + if (data_left < 0) + return error; + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + fid = (struct fid *)(fh->fh_fsid + len); error = nfserr_stale; if (IS_ERR(exp)) { @@ -253,18 +241,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (rqstp->rq_vers > 2) error = nfserr_badhandle; - if (fh->fh_version != 1) { - sfid.i32.ino = fh->ofh_ino; - sfid.i32.gen = fh->ofh_generation; - sfid.i32.parent_ino = fh->ofh_dirino; - fid = &sfid; - data_left = 3; - if (fh->ofh_dirino == 0) - fileid_type = FILEID_INO32_GEN; - else - fileid_type = FILEID_INO32_GEN_PARENT; - } else - fileid_type = fh->fh_fileid_type; + fileid_type = fh->fh_fileid_type; if (fileid_type == FILEID_ROOT) dentry = dget(exp->ex_path.dentry); @@ -354,8 +331,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) struct dentry *dentry; __be32 error; - dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); - if (!fhp->fh_dentry) { error = nfsd_set_fh_dentry(rqstp, fhp); if (error) @@ -363,6 +338,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) } dentry = fhp->fh_dentry; exp = fhp->fh_export; + + trace_nfsd_fh_verify(rqstp, fhp, type, access); + /* * We still have to do all these permission checks, even when * fh_dentry is already set: @@ -414,14 +392,8 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) skip_pseudoflavor_check: /* Finally, check access permissions. */ error = nfsd_permission(rqstp, exp, dentry, access); - - if (error) { - dprintk("fh_verify: %pd2 permission failure, " - "acc=%x, error=%d\n", - dentry, - access, ntohl(error)); - } out: + trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error); if (error == nfserr_stale) nfsd_stats_fh_stale_inc(exp); return error; @@ -452,20 +424,6 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, } } -/* - * for composing old style file handles - */ -static inline void _fh_update_old(struct dentry *dentry, - struct svc_export *exp, - struct knfsd_fh *fh) -{ - fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino); - fh->ofh_generation = d_inode(dentry)->i_generation; - if (d_is_dir(dentry) || - (exp->ex_flags & NFSEXP_NOSUBTREECHECK)) - fh->ofh_dirino = 0; -} - static bool is_root_export(struct svc_export *exp) { return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root; @@ -562,9 +520,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, /* ref_fh is a reference file handle. * if it is non-null and for the same filesystem, then we should compose * a filehandle which is of the same version, where possible. - * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca - * Then create a 32byte filehandle using nfs_fhbase_old - * */ struct inode * inode = d_inode(dentry); @@ -588,7 +543,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (ref_fh == fhp) fh_put(ref_fh); - if (fhp->fh_locked || fhp->fh_dentry) { + if (fhp->fh_dentry) { printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", dentry); } @@ -600,35 +555,21 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp_get(exp); - if (fhp->fh_handle.fh_version == 0xca) { - /* old style filehandle please */ - memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); - fhp->fh_handle.fh_size = NFS_FHSIZE; - fhp->fh_handle.ofh_dcookie = 0xfeebbaca; - fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev); - fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev; - fhp->fh_handle.ofh_xino = - ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino); - fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry)); - if (inode) - _fh_update_old(dentry, exp, &fhp->fh_handle); - } else { - fhp->fh_handle.fh_size = - key_len(fhp->fh_handle.fh_fsid_type) + 4; - fhp->fh_handle.fh_auth_type = 0; - - mk_fsid(fhp->fh_handle.fh_fsid_type, - fhp->fh_handle.fh_fsid, - ex_dev, - d_inode(exp->ex_path.dentry)->i_ino, - exp->ex_fsid, exp->ex_uuid); - - if (inode) - _fh_update(fhp, exp, dentry); - if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { - fh_put(fhp); - return nfserr_opnotsupp; - } + fhp->fh_handle.fh_size = + key_len(fhp->fh_handle.fh_fsid_type) + 4; + fhp->fh_handle.fh_auth_type = 0; + + mk_fsid(fhp->fh_handle.fh_fsid_type, + fhp->fh_handle.fh_fsid, + ex_dev, + d_inode(exp->ex_path.dentry)->i_ino, + exp->ex_fsid, exp->ex_uuid); + + if (inode) + _fh_update(fhp, exp, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { + fh_put(fhp); + return nfserr_opnotsupp; } return 0; @@ -649,16 +590,12 @@ fh_update(struct svc_fh *fhp) dentry = fhp->fh_dentry; if (d_really_is_negative(dentry)) goto out_negative; - if (fhp->fh_handle.fh_version != 1) { - _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); - } else { - if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) - return 0; + if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) + return 0; - _fh_update(fhp, fhp->fh_export, dentry); - if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) - return nfserr_opnotsupp; - } + _fh_update(fhp, fhp->fh_export, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) + return nfserr_opnotsupp; return 0; out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); @@ -669,6 +606,85 @@ fh_update(struct svc_fh *fhp) return nfserr_serverfault; } +/** + * fh_fill_pre_attrs - Fill in pre-op attributes + * @fhp: file handle to be updated + * + */ +void fh_fill_pre_attrs(struct svc_fh *fhp) +{ + bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); + struct inode *inode; + struct kstat stat; + __be32 err; + + if (fhp->fh_no_wcc || fhp->fh_pre_saved) + return; + + inode = d_inode(fhp->fh_dentry); + err = fh_getattr(fhp, &stat); + if (err) { + /* Grab the times from inode anyway */ + stat.mtime = inode->i_mtime; + stat.ctime = inode->i_ctime; + stat.size = inode->i_size; + } + if (v4) + fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); + + fhp->fh_pre_mtime = stat.mtime; + fhp->fh_pre_ctime = stat.ctime; + fhp->fh_pre_size = stat.size; + fhp->fh_pre_saved = true; +} + +/** + * fh_fill_post_attrs - Fill in post-op attributes + * @fhp: file handle to be updated + * + */ +void fh_fill_post_attrs(struct svc_fh *fhp) +{ + bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); + struct inode *inode = d_inode(fhp->fh_dentry); + __be32 err; + + if (fhp->fh_no_wcc) + return; + + if (fhp->fh_post_saved) + printk("nfsd: inode locked twice during operation.\n"); + + err = fh_getattr(fhp, &fhp->fh_post_attr); + if (err) { + fhp->fh_post_saved = false; + fhp->fh_post_attr.ctime = inode->i_ctime; + } else + fhp->fh_post_saved = true; + if (v4) + fhp->fh_post_change = + nfsd4_change_attribute(&fhp->fh_post_attr, inode); +} + +/** + * fh_fill_both_attrs - Fill pre-op and post-op attributes + * @fhp: file handle to be updated + * + * This is used when the directory wasn't changed, but wcc attributes + * are needed anyway. + */ +void fh_fill_both_attrs(struct svc_fh *fhp) +{ + fh_fill_post_attrs(fhp); + if (!fhp->fh_post_saved) + return; + fhp->fh_pre_change = fhp->fh_post_change; + fhp->fh_pre_mtime = fhp->fh_post_attr.mtime; + fhp->fh_pre_ctime = fhp->fh_post_attr.ctime; + fhp->fh_pre_size = fhp->fh_post_attr.size; + fhp->fh_pre_saved = true; +} + /* * Release a file handle. */ @@ -678,10 +694,9 @@ fh_put(struct svc_fh *fhp) struct dentry * dentry = fhp->fh_dentry; struct svc_export * exp = fhp->fh_export; if (dentry) { - fh_unlock(fhp); fhp->fh_dentry = NULL; dput(dentry); - fh_clear_wcc(fhp); + fh_clear_pre_post_attrs(fhp); } fh_drop_write(fhp); if (exp) { @@ -698,16 +713,11 @@ fh_put(struct svc_fh *fhp) char * SVCFH_fmt(struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; + static char buf[2+1+1+64*3+1]; - static char buf[80]; - sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x", - fh->fh_size, - fh->fh_base.fh_pad[0], - fh->fh_base.fh_pad[1], - fh->fh_base.fh_pad[2], - fh->fh_base.fh_pad[3], - fh->fh_base.fh_pad[4], - fh->fh_base.fh_pad[5]); + if (fh->fh_size < 0 || fh->fh_size> 64) + return "bad-fh"; + sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw); return buf; } diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 6106697adc..513e028b0b 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -10,9 +10,56 @@ #include #include -#include #include #include +#include + +/* + * The file handle starts with a sequence of four-byte words. + * The first word contains a version number (1) and three descriptor bytes + * that tell how the remaining 3 variable length fields should be handled. + * These three bytes are auth_type, fsid_type and fileid_type. + * + * All four-byte values are in host-byte-order. + * + * The auth_type field is deprecated and must be set to 0. + * + * The fsid_type identifies how the filesystem (or export point) is + * encoded. + * Current values: + * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number + * NOTE: we cannot use the kdev_t device id value, because kdev_t.h + * says we mustn't. We must break it up and reassemble. + * 1 - 4 byte user specified identifier + * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED + * 3 - 4 byte device id, encoded for user-space, 4 byte inode number + * 4 - 4 byte inode number and 4 byte uuid + * 5 - 8 byte uuid + * 6 - 16 byte uuid + * 7 - 8 byte inode number and 16 byte uuid + * + * The fileid_type identifies how the file within the filesystem is encoded. + * The values for this field are filesystem specific, exccept that + * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' + * in include/linux/exportfs.h for currently registered values. + */ + +struct knfsd_fh { + unsigned int fh_size; /* + * Points to the current size while + * building a new file handle. + */ + union { + char fh_raw[NFS4_FHSIZE]; + struct { + u8 fh_version; /* == 1 */ + u8 fh_auth_type; /* deprecated */ + u8 fh_fsid_type; + u8 fh_fileid_type; + u32 fh_fsid[]; /* flexible-array member */ + }; + }; +}; static inline __u32 ino_t_to_u32(ino_t ino) { @@ -34,7 +81,6 @@ typedef struct svc_fh { struct dentry * fh_dentry; /* validated dentry */ struct svc_export * fh_export; /* export pointer */ - bool fh_locked; /* inode locked by us */ bool fh_want_write; /* remount protection taken */ bool fh_no_wcc; /* no wcc data needed */ bool fh_no_atomic_attr; @@ -43,11 +89,10 @@ typedef struct svc_fh { * operation */ int fh_flags; /* FH flags */ -#ifdef CONFIG_NFSD_V3 bool fh_post_saved; /* post-op attrs saved */ bool fh_pre_saved; /* pre-op attrs saved */ - /* Pre-op attributes saved during fh_lock */ + /* Pre-op attributes saved when inode is locked */ __u64 fh_pre_size; /* size before operation */ struct timespec64 fh_pre_mtime; /* mtime before oper */ struct timespec64 fh_pre_ctime; /* ctime before oper */ @@ -57,10 +102,9 @@ typedef struct svc_fh { */ u64 fh_pre_change; - /* Post-op attributes saved in fh_unlock */ + /* Post-op attributes saved in fh_fill_post_attrs() */ struct kstat fh_post_attr; /* full attrs after operation */ u64 fh_post_change; /* nfsv4 change; see above */ -#endif /* CONFIG_NFSD_V3 */ } svc_fh; #define NFSD4_FH_FOREIGN (1<<0) #define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f)) @@ -176,19 +220,19 @@ __be32 fh_update(struct svc_fh *); void fh_put(struct svc_fh *); static __inline__ struct svc_fh * -fh_copy(struct svc_fh *dst, struct svc_fh *src) +fh_copy(struct svc_fh *dst, const struct svc_fh *src) { - WARN_ON(src->fh_dentry || src->fh_locked); - + WARN_ON(src->fh_dentry); + *dst = *src; return dst; } static inline void -fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) +fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src) { dst->fh_size = src->fh_size; - memcpy(&dst->fh_base, &src->fh_base, src->fh_size); + memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size); } static __inline__ struct svc_fh * @@ -199,16 +243,18 @@ fh_init(struct svc_fh *fhp, int maxsize) return fhp; } -static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) +static inline bool fh_match(const struct knfsd_fh *fh1, + const struct knfsd_fh *fh2) { if (fh1->fh_size != fh2->fh_size) return false; - if (memcmp(fh1->fh_base.fh_pad, fh2->fh_base.fh_pad, fh1->fh_size) != 0) + if (memcmp(fh1->fh_raw, fh2->fh_raw, fh1->fh_size) != 0) return false; return true; } -static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) +static inline bool fh_fsid_match(const struct knfsd_fh *fh1, + const struct knfsd_fh *fh2) { if (fh1->fh_fsid_type != fh2->fh_fsid_type) return false; @@ -227,7 +273,7 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) */ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) { - return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size); + return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size); } #else static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) @@ -236,13 +282,12 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) } #endif -#ifdef CONFIG_NFSD_V3 -/* - * The wcc data stored in current_fh should be cleared - * between compound ops. +/** + * fh_clear_pre_post_attrs - Reset pre/post attributes + * @fhp: file handle to be updated + * */ -static inline void -fh_clear_wcc(struct svc_fh *fhp) +static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp) { fhp->fh_post_saved = false; fhp->fh_pre_saved = false; @@ -276,59 +321,7 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat, return time_to_chattr(&stat->ctime); } -extern void fill_pre_wcc(struct svc_fh *fhp); -extern void fill_post_wcc(struct svc_fh *fhp); -#else -#define fh_clear_wcc(ignored) -#define fill_pre_wcc(ignored) -#define fill_post_wcc(notused) -#endif /* CONFIG_NFSD_V3 */ - - -/* - * Lock a file handle/inode - * NOTE: both fh_lock and fh_unlock are done "by hand" in - * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once - * so, any changes here should be reflected there. - */ - -static inline void -fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) -{ - struct dentry *dentry = fhp->fh_dentry; - struct inode *inode; - - BUG_ON(!dentry); - - if (fhp->fh_locked) { - printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", - dentry); - return; - } - - inode = d_inode(dentry); - inode_lock_nested(inode, subclass); - fill_pre_wcc(fhp); - fhp->fh_locked = true; -} - -static inline void -fh_lock(struct svc_fh *fhp) -{ - fh_lock_nested(fhp, I_MUTEX_NORMAL); -} - -/* - * Unlock a file handle/inode - */ -static inline void -fh_unlock(struct svc_fh *fhp) -{ - if (fhp->fh_locked) { - fill_post_wcc(fhp); - inode_unlock(d_inode(fhp->fh_dentry)); - fhp->fh_locked = false; - } -} - +extern void fh_fill_pre_attrs(struct svc_fh *fhp); +extern void fh_fill_post_attrs(struct svc_fh *fhp); +extern void fh_fill_both_attrs(struct svc_fh *fhp); #endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index b009da1dcb..9744443c39 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -51,6 +51,9 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) struct nfsd_sattrargs *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; struct iattr *iap = &argp->attrs; + struct nfsd_attrs attrs = { + .na_iattr = iap, + }; struct svc_fh *fhp; dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n", @@ -100,7 +103,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) } } - resp->status = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0); + resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0); if (resp->status != nfs_ok) goto out; @@ -208,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - return rpc_drop_reply; + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -243,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - return rpc_drop_reply; + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -261,6 +264,9 @@ nfsd_proc_create(struct svc_rqst *rqstp) svc_fh *dirfhp = &argp->fh; svc_fh *newfhp = &resp->fh; struct iattr *attr = &argp->attrs; + struct nfsd_attrs attrs = { + .na_iattr = attr, + }; struct inode *inode; struct dentry *dchild; int type, mode; @@ -286,7 +292,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) goto done; } - fh_lock_nested(dirfhp, I_MUTEX_PARENT); + inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT); dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); if (IS_ERR(dchild)) { resp->status = nfserrno(PTR_ERR(dchild)); @@ -385,9 +391,8 @@ nfsd_proc_create(struct svc_rqst *rqstp) resp->status = nfs_ok; if (!inode) { /* File doesn't exist. Create it and set attrs */ - resp->status = nfsd_create_locked(rqstp, dirfhp, argp->name, - argp->len, attr, type, rdev, - newfhp); + resp->status = nfsd_create_locked(rqstp, dirfhp, &attrs, type, + rdev, newfhp); } else if (type == S_IFREG) { dprintk("nfsd: existing %s, valid=%x, size=%ld\n", argp->name, attr->ia_valid, (long) attr->ia_size); @@ -397,13 +402,12 @@ nfsd_proc_create(struct svc_rqst *rqstp) */ attr->ia_valid &= ATTR_SIZE; if (attr->ia_valid) - resp->status = nfsd_setattr(rqstp, newfhp, attr, 0, + resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0, (time64_t)0); } out_unlock: - /* We don't really need to unlock, as fh_put does it. */ - fh_unlock(dirfhp); + inode_unlock(dirfhp->fh_dentry->d_inode); fh_drop_write(dirfhp); done: fh_put(dirfhp); @@ -473,6 +477,9 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) { struct nfsd_symlinkargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; struct svc_fh newfh; if (argp->tlen > NFS_MAXPATHLEN) { @@ -494,7 +501,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) fh_init(&newfh, NFS_FHSIZE); resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, - argp->tname, &newfh); + argp->tname, &attrs, &newfh); kfree(argp->tname); fh_put(&argp->ffh); @@ -512,6 +519,9 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) { struct nfsd_createargs *argp = rqstp->rq_argp; struct nfsd_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); @@ -523,7 +533,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) argp->attrs.ia_valid &= ~ATTR_SIZE; fh_init(&resp->fh, NFS_FHSIZE); resp->status = nfsd_create(rqstp, &argp->fh, argp->name, argp->len, - &argp->attrs, S_IFDIR, 0, &resp->fh); + &attrs, S_IFDIR, 0, &resp->fh); fh_put(&argp->fh); if (resp->status != nfs_ok) goto out; @@ -565,15 +575,7 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, buf->pages = rqstp->rq_next_page; rqstp->rq_next_page++; - /* This is xdr_init_encode(), but it assumes that - * the head kvec has already been consumed. */ - xdr_set_scratch_buffer(xdr, NULL, 0); - xdr->buf = buf; - xdr->page_ptr = buf->pages; - xdr->iov = NULL; - xdr->p = page_address(*buf->pages); - xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); - xdr->rqst = NULL; + xdr_init_encode_pages(xdr, buf, buf->pages, NULL); } /* @@ -635,6 +637,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, @@ -646,6 +649,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, @@ -657,6 +661,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_sattrargs), + .pc_argzero = sizeof(struct nfsd_sattrargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+AT, @@ -667,6 +672,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, @@ -678,6 +684,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_diropres, .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_argzero = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+FH+AT, @@ -688,6 +695,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_fhandleargs, .pc_encode = nfssvc_encode_readlinkres, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_readlinkres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+1+NFS_MAXPATHLEN/4, @@ -699,6 +707,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_readres, .pc_release = nfssvc_release_readres, .pc_argsize = sizeof(struct nfsd_readargs), + .pc_argzero = sizeof(struct nfsd_readargs), .pc_ressize = sizeof(struct nfsd_readres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, @@ -709,6 +718,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, @@ -720,6 +730,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_writeargs), + .pc_argzero = sizeof(struct nfsd_writeargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+AT, @@ -731,6 +742,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_diropres, .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_createargs), + .pc_argzero = sizeof(struct nfsd_createargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+FH+AT, @@ -741,6 +753,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_diropargs, .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_argzero = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, @@ -751,6 +764,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_renameargs, .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_renameargs), + .pc_argzero = sizeof(struct nfsd_renameargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, @@ -761,6 +775,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_linkargs, .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_linkargs), + .pc_argzero = sizeof(struct nfsd_linkargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, @@ -771,6 +786,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_symlinkargs, .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_symlinkargs), + .pc_argzero = sizeof(struct nfsd_symlinkargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, @@ -782,6 +798,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_diropres, .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_createargs), + .pc_argzero = sizeof(struct nfsd_createargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+FH+AT, @@ -792,6 +809,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_diropargs, .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_argzero = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, @@ -802,6 +820,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_readdirargs, .pc_encode = nfssvc_encode_readdirres, .pc_argsize = sizeof(struct nfsd_readdirargs), + .pc_argzero = sizeof(struct nfsd_readdirargs), .pc_ressize = sizeof(struct nfsd_readdirres), .pc_cachetype = RC_NOCACHE, .pc_name = "READDIR", @@ -811,6 +830,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_fhandleargs, .pc_encode = nfssvc_encode_statfsres, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_statfsres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+5, @@ -828,62 +848,3 @@ const struct svc_version nfsd_version2 = { .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS2_SVC_XDRSIZE, }; - -/* - * Map errnos to NFS errnos. - */ -__be32 -nfserrno (int errno) -{ - static struct { - __be32 nfserr; - int syserr; - } nfs_errtbl[] = { - { nfs_ok, 0 }, - { nfserr_perm, -EPERM }, - { nfserr_noent, -ENOENT }, - { nfserr_io, -EIO }, - { nfserr_nxio, -ENXIO }, - { nfserr_fbig, -E2BIG }, - { nfserr_acces, -EACCES }, - { nfserr_exist, -EEXIST }, - { nfserr_xdev, -EXDEV }, - { nfserr_mlink, -EMLINK }, - { nfserr_nodev, -ENODEV }, - { nfserr_notdir, -ENOTDIR }, - { nfserr_isdir, -EISDIR }, - { nfserr_inval, -EINVAL }, - { nfserr_fbig, -EFBIG }, - { nfserr_nospc, -ENOSPC }, - { nfserr_rofs, -EROFS }, - { nfserr_mlink, -EMLINK }, - { nfserr_nametoolong, -ENAMETOOLONG }, - { nfserr_notempty, -ENOTEMPTY }, -#ifdef EDQUOT - { nfserr_dquot, -EDQUOT }, -#endif - { nfserr_stale, -ESTALE }, - { nfserr_jukebox, -ETIMEDOUT }, - { nfserr_jukebox, -ERESTARTSYS }, - { nfserr_jukebox, -EAGAIN }, - { nfserr_jukebox, -EWOULDBLOCK }, - { nfserr_jukebox, -ENOMEM }, - { nfserr_io, -ETXTBSY }, - { nfserr_notsupp, -EOPNOTSUPP }, - { nfserr_toosmall, -ETOOSMALL }, - { nfserr_serverfault, -ESERVERFAULT }, - { nfserr_serverfault, -ENFILE }, - { nfserr_io, -EUCLEAN }, - { nfserr_perm, -ENOKEY }, - { nfserr_no_grace, -ENOGRACE}, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { - if (nfs_errtbl[i].syserr == errno) - return nfs_errtbl[i].nfserr; - } - WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); - return nfserr_io; -} - diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 373695cc62..3d4fd40c98 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -55,18 +56,17 @@ static __be32 nfsd_init_request(struct svc_rqst *, struct svc_process_info *); /* - * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members - * of the svc_serv struct. In particular, ->sv_nrthreads but also to some - * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt + * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members + * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks. * * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a - * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number - * of nfsd threads must exist and each must listed in ->sp_all_threads in each - * entry of ->sv_pools[]. + * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless + * nn->keep_active is set). That number of nfsd threads must + * exist and each must be listed in ->sp_all_threads in some entry of + * ->sv_pools[]. * - * Transitions of the thread count between zero and non-zero are of particular - * interest since the svc_serv needs to be created and initialized at that - * point, or freed. + * Each active thread holds a counted reference on nn->nfsd_serv, as does + * the nn->keep_active flag and various transient calls to svc_get(). * * Finally, the nfsd_mutex also protects some of the global variables that are * accessed when nfsd starts and that are settable via the write_* routines in @@ -91,8 +91,12 @@ unsigned long nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static const struct svc_version *nfsd_acl_version[] = { +# if defined(CONFIG_NFSD_V2_ACL) [2] = &nfsd_acl_version2, +# endif +# if defined(CONFIG_NFSD_V3_ACL) [3] = &nfsd_acl_version3, +# endif }; #define NFSD_ACL_MINVERS 2 @@ -116,10 +120,10 @@ static struct svc_stat nfsd_acl_svcstats = { #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ static const struct svc_version *nfsd_version[] = { +#if defined(CONFIG_NFSD_V2) [2] = &nfsd_version2, -#if defined(CONFIG_NFSD_V3) - [3] = &nfsd_version3, #endif + [3] = &nfsd_version3, #if defined(CONFIG_NFSD_V4) [4] = &nfsd_version4, #endif @@ -293,13 +297,13 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred) if (!list_empty(&nn->nfsd_serv->sv_permsocks)) return 0; - error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS, cred); + error = svc_xprt_create(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; - error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS, cred); + error = svc_xprt_create(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; @@ -345,33 +349,57 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn) return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST); } -void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn) +/** + * nfsd_copy_write_verifier - Atomically copy a write verifier + * @verf: buffer in which to receive the verifier cookie + * @nn: NFS net namespace + * + * This function provides a wait-free mechanism for copying the + * namespace's write verifier without tearing it. + */ +void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn) { int seq = 0; do { - read_seqbegin_or_lock(&nn->boot_lock, &seq); - /* - * This is opaque to client, so no need to byte-swap. Use - * __force to keep sparse happy. y2038 time_t overflow is - * irrelevant in this usage - */ - verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; - verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec; - } while (need_seqretry(&nn->boot_lock, seq)); - done_seqretry(&nn->boot_lock, seq); + read_seqbegin_or_lock(&nn->writeverf_lock, &seq); + memcpy(verf, nn->writeverf, sizeof(nn->writeverf)); + } while (need_seqretry(&nn->writeverf_lock, seq)); + done_seqretry(&nn->writeverf_lock, seq); } -static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn) +static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn) { - ktime_get_real_ts64(&nn->nfssvc_boot); + struct timespec64 now; + u64 verf; + + /* + * Because the time value is hashed, y2038 time_t overflow + * is irrelevant in this usage. + */ + ktime_get_raw_ts64(&now); + verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key); + memcpy(nn->writeverf, &verf, sizeof(nn->writeverf)); } -void nfsd_reset_boot_verifier(struct nfsd_net *nn) +/** + * nfsd_reset_write_verifier - Generate a new write verifier + * @nn: NFS net namespace + * + * This function updates the ->writeverf field of @nn. This field + * contains an opaque cookie that, according to Section 18.32.3 of + * RFC 8881, "the client can use to determine whether a server has + * changed instance state (e.g., server restart) between a call to + * WRITE and a subsequent call to either WRITE or COMMIT. This + * cookie MUST be unchanged during a single instance of the NFSv4.1 + * server and MUST be unique between instances of the NFSv4.1 + * server." + */ +void nfsd_reset_write_verifier(struct nfsd_net *nn) { - write_seqlock(&nn->boot_lock); - nfsd_reset_boot_verifier_locked(nn); - write_sequnlock(&nn->boot_lock); + write_seqlock(&nn->writeverf_lock); + nfsd_reset_write_verifier_locked(nn); + write_sequnlock(&nn->writeverf_lock); } static int nfsd_startup_net(struct net *net, const struct cred *cred) @@ -435,6 +463,7 @@ static void nfsd_shutdown_net(struct net *net) nfsd_shutdown_generic(); } +static DEFINE_SPINLOCK(nfsd_notifier_lock); static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -444,18 +473,17 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in sin; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nn->ntf_refcnt)) + if (event != NETDEV_DOWN || !nn->nfsd_serv) goto out; + spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ifa->ifa_local; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin); } - atomic_dec(&nn->ntf_refcnt); - wake_up(&nn->ntf_wq); + spin_unlock(&nfsd_notifier_lock); out: return NOTIFY_DONE; @@ -475,10 +503,10 @@ static int nfsd_inet6addr_event(struct notifier_block *this, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in6 sin6; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nn->ntf_refcnt)) + if (event != NETDEV_DOWN || !nn->nfsd_serv) goto out; + spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; @@ -487,8 +515,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this, sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); } - atomic_dec(&nn->ntf_refcnt); - wake_up(&nn->ntf_wq); + spin_unlock(&nfsd_notifier_lock); + out: return NOTIFY_DONE; } @@ -501,11 +529,15 @@ static struct notifier_block nfsd_inet6addr_notifier = { /* Only used under nfsd_mutex, so this atomic may be overkill: */ static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0); -static void nfsd_last_thread(struct svc_serv *serv, struct net *net) +void nfsd_last_thread(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv = nn->nfsd_serv; + + spin_lock(&nfsd_notifier_lock); + nn->nfsd_serv = NULL; + spin_unlock(&nfsd_notifier_lock); - atomic_dec(&nn->ntf_refcnt); /* check if the notifier still has clients */ if (atomic_dec_return(&nfsd_notifier_refcount) == 0) { unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); @@ -513,7 +545,8 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0); + + svc_xprt_destroy_all(serv, net); /* * write_ports can create the server without actually starting @@ -590,24 +623,6 @@ static int nfsd_get_default_max_blksize(void) return ret; } -static const struct svc_serv_ops nfsd_thread_sv_ops = { - .svo_shutdown = nfsd_last_thread, - .svo_function = nfsd, - .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads, - .svo_module = THIS_MODULE, -}; - -static void nfsd_complete_shutdown(struct net *net) -{ - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - - WARN_ON(!mutex_is_locked(&nfsd_mutex)); - - nn->nfsd_serv = NULL; - complete(&nn->nfsd_shutdown_complete); -} - void nfsd_shutdown_threads(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -622,11 +637,10 @@ void nfsd_shutdown_threads(struct net *net) svc_get(serv); /* Kill outstanding nfsd threads */ - serv->sv_ops->svo_setup(serv, NULL, 0); - nfsd_destroy(net); + svc_set_num_threads(serv, NULL, 0); + nfsd_last_thread(net); + svc_put(serv); mutex_unlock(&nfsd_mutex); - /* Wait for shutdown of nfsd_serv to complete */ - wait_for_completion(&nn->nfsd_shutdown_complete); } bool i_am_nfsd(void) @@ -638,6 +652,7 @@ int nfsd_create_serv(struct net *net) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nn->nfsd_serv) { @@ -647,19 +662,19 @@ int nfsd_create_serv(struct net *net) if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(nn); - nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - &nfsd_thread_sv_ops); - if (nn->nfsd_serv == NULL) + serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd); + if (serv == NULL) return -ENOMEM; - init_completion(&nn->nfsd_shutdown_complete); - nn->nfsd_serv->sv_maxconn = nn->max_connections; - error = svc_bind(nn->nfsd_serv, net); + serv->sv_maxconn = nn->max_connections; + error = svc_bind(serv, net); if (error < 0) { - svc_destroy(nn->nfsd_serv); - nfsd_complete_shutdown(net); + svc_put(serv); return error; } + spin_lock(&nfsd_notifier_lock); + nn->nfsd_serv = serv; + spin_unlock(&nfsd_notifier_lock); set_max_drc(); /* check if the notifier is already set */ @@ -669,8 +684,7 @@ int nfsd_create_serv(struct net *net) register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - atomic_inc(&nn->ntf_refcnt); - nfsd_reset_boot_verifier(nn); + nfsd_reset_write_verifier(nn); return 0; } @@ -697,18 +711,6 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) return 0; } -void nfsd_destroy(struct net *net) -{ - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - int destroy = (nn->nfsd_serv->sv_nrthreads == 1); - - if (destroy) - svc_shutdown_net(nn->nfsd_serv, net); - svc_destroy(nn->nfsd_serv); - if (destroy) - nfsd_complete_shutdown(net); -} - int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; @@ -733,7 +735,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) if (tot > NFSD_MAXSERVS) { /* total too large: scale down requested numbers */ for (i = 0; i < n && tot > 0; i++) { - int new = nthreads[i] * NFSD_MAXSERVS / tot; + int new = nthreads[i] * NFSD_MAXSERVS / tot; tot -= (nthreads[i] - new); nthreads[i] = new; } @@ -753,12 +755,13 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) /* apply the new numbers */ svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { - err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, - &nn->nfsd_serv->sv_pools[i], nthreads[i]); + err = svc_set_num_threads(nn->nfsd_serv, + &nn->nfsd_serv->sv_pools[i], + nthreads[i]); if (err) break; } - nfsd_destroy(net); + svc_put(nn->nfsd_serv); return err; } @@ -773,6 +776,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) int error; bool nfsd_up_before; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); @@ -784,7 +788,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; - strlcpy(nn->nfsd_name, utsname()->nodename, + strscpy(nn->nfsd_name, utsname()->nodename, sizeof(nn->nfsd_name)); error = nfsd_create_serv(net); @@ -792,24 +796,25 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) goto out; nfsd_up_before = nn->nfsd_net_up; + serv = nn->nfsd_serv; error = nfsd_startup_net(net, cred); if (error) - goto out_destroy; - error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, - NULL, nrservs); + goto out_put; + error = svc_set_num_threads(serv, NULL, nrservs); if (error) goto out_shutdown; - /* We are holding a reference to nn->nfsd_serv which - * we don't want to count in the return value, - * so subtract 1 - */ - error = nn->nfsd_serv->sv_nrthreads - 1; + error = serv->sv_nrthreads; + if (error == 0) + nfsd_last_thread(net); out_shutdown: if (error < 0 && !nfsd_up_before) nfsd_shutdown_net(net); -out_destroy: - nfsd_destroy(net); /* Release server */ +out_put: + /* Threads now hold service active */ + if (xchg(&nn->keep_active, 0)) + svc_put(serv); + svc_put(serv); out: mutex_unlock(&nfsd_mutex); return error; @@ -923,9 +928,6 @@ nfsd(void *vrqstp) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int err; - /* Lock module and set up kernel thread */ - mutex_lock(&nfsd_mutex); - /* At this point, the thread shares current->fs * with the init process. We need to create files with the * umask as defined by the client instead of init's umask. */ @@ -936,17 +938,7 @@ nfsd(void *vrqstp) current->fs->umask = 0; - /* - * thread is spawned with all signals set to SIG_IGN, re-enable - * the ones that will bring down the thread - */ - allow_signal(SIGKILL); - allow_signal(SIGHUP); - allow_signal(SIGINT); - allow_signal(SIGQUIT); - - nfsdstats.th_cnt++; - mutex_unlock(&nfsd_mutex); + atomic_inc(&nfsdstats.th_cnt); set_freezable(); @@ -970,23 +962,11 @@ nfsd(void *vrqstp) validate_process_creds(); } - /* Clear signals before calling svc_exit_thread() */ - flush_signals(current); - - mutex_lock(&nfsd_mutex); - nfsdstats.th_cnt --; + atomic_dec(&nfsdstats.th_cnt); out: - rqstp->rq_server = NULL; - /* Release the thread */ svc_exit_thread(rqstp); - - nfsd_destroy(net); - - /* Release module */ - mutex_unlock(&nfsd_mutex); - module_put_and_exit(0); return 0; } @@ -1004,9 +984,6 @@ nfsd(void *vrqstp) int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) { const struct svc_procedure *proc = rqstp->rq_procinfo; - struct kvec *argv = &rqstp->rq_arg.head[0]; - struct kvec *resv = &rqstp->rq_res.head[0]; - __be32 *p; /* * Give the xdr decoder a chance to change this if it wants @@ -1015,7 +992,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) rqstp->rq_cachetype = proc->pc_cachetype; svcxdr_init_decode(rqstp); - if (!proc->pc_decode(rqstp, argv->iov_base)) + if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; switch (nfsd_cache_lookup(rqstp)) { @@ -1031,14 +1008,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) * Need to grab the location to store the status, as * NFSv4 does some encoding while processing */ - p = resv->iov_base + resv->iov_len; svcxdr_init_encode(rqstp); *statp = proc->pc_func(rqstp); - if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) + if (test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; - if (!proc->pc_encode(rqstp, p)) + if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1); @@ -1065,29 +1041,29 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /** * nfssvc_decode_voidarg - Decode void arguments * @rqstp: Server RPC transaction context - * @p: buffer containing arguments to decode + * @xdr: XDR stream positioned at arguments to decode * * Return values: - * %0: Arguments were not valid - * %1: Decoding was successful + * %false: Arguments were not valid + * %true: Decoding was successful */ -int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) +bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } /** * nfssvc_encode_voidres - Encode void results * @rqstp: Server RPC transaction context - * @p: buffer in which to encode results + * @xdr: XDR stream into which to encode results * * Return values: - * %0: Local error while encoding - * %1: Encoding was successful + * %false: Local error while encoding + * %true: Encoding was successful */ -int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) +bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } int nfsd_pool_stats_open(struct inode *inode, struct file *file) @@ -1100,7 +1076,6 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) mutex_unlock(&nfsd_mutex); return -ENODEV; } - /* bump up the psudo refcount while traversing */ svc_get(nn->nfsd_serv); ret = svc_pool_stats_open(nn->nfsd_serv, file); mutex_unlock(&nfsd_mutex); @@ -1109,12 +1084,12 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) int nfsd_pool_stats_release(struct inode *inode, struct file *file) { + struct seq_file *seq = file->private_data; + struct svc_serv *serv = seq->private; int ret = seq_release(inode, file); - struct net *net = inode->i_sb->s_fs_info; mutex_lock(&nfsd_mutex); - /* this function really, really should have been called svc_put() */ - nfsd_destroy(net); + svc_put(serv); mutex_unlock(&nfsd_mutex); return ret; } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 26a42f87c2..caf6355b18 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -64,7 +64,7 @@ svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp) if (!p) return false; fh_init(fhp, NFS_FHSIZE); - memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE); + memcpy(&fhp->fh_handle.fh_raw, p, NFS_FHSIZE); fhp->fh_handle.fh_size = NFS_FHSIZE; return true; @@ -78,7 +78,7 @@ svcxdr_encode_fhandle(struct xdr_stream *xdr, const struct svc_fh *fhp) p = xdr_reserve_space(xdr, NFS_FHSIZE); if (!p) return false; - memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE); + memcpy(p, &fhp->fh_handle.fh_raw, NFS_FHSIZE); return true; } @@ -272,87 +272,79 @@ svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, * XDR decode functions */ -int -nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_fhandle *args = rqstp->rq_argp; return svcxdr_decode_fhandle(xdr, &args->fh); } -int -nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_sattrargs *args = rqstp->rq_argp; return svcxdr_decode_fhandle(xdr, &args->fh) && svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int -nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_diropargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len); } -int -nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_readargs *args = rqstp->rq_argp; u32 totalcount; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; /* totalcount is ignored */ if (xdr_stream_decode_u32(xdr, &totalcount) < 0) - return 0; + return false; - return 1; + return true; } -int -nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_writeargs *args = rqstp->rq_argp; u32 beginoffset, totalcount; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; /* beginoffset is ignored */ if (xdr_stream_decode_u32(xdr, &beginoffset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->offset) < 0) - return 0; + return false; /* totalcount is ignored */ if (xdr_stream_decode_u32(xdr, &totalcount) < 0) - return 0; + return false; /* opaque data */ if (xdr_stream_decode_u32(xdr, &args->len) < 0) - return 0; + return false; if (args->len > NFSSVC_MAXBLKSIZE_V2) - return 0; - if (!xdr_stream_subsegment(xdr, &args->payload, args->len)) - return 0; + return false; - return 1; + return xdr_stream_subsegment(xdr, &args->payload, args->len); } -int -nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_createargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs(xdr, &args->fh, @@ -360,10 +352,9 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int -nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_renameargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs(xdr, &args->ffh, @@ -372,10 +363,9 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) &args->tname, &args->tlen); } -int -nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_linkargs *args = rqstp->rq_argp; return svcxdr_decode_fhandle(xdr, &args->ffh) && @@ -383,178 +373,170 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) &args->tname, &args->tlen); } -int -nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_symlinkargs *args = rqstp->rq_argp; struct kvec *head = rqstp->rq_arg.head; if (!svcxdr_decode_diropargs(xdr, &args->ffh, &args->fname, &args->flen)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->tlen) < 0) - return 0; + return false; if (args->tlen == 0) - return 0; + return false; args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); args->first.iov_base = xdr_inline_decode(xdr, args->tlen); if (!args->first.iov_base) - return 0; + return false; return svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int -nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_readdirargs *args = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->cookie) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } /* * XDR encode functions */ -int -nfssvc_encode_statres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_stat *resp = rqstp->rq_resp; return svcxdr_encode_stat(xdr, resp->status); } -int -nfssvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_attrstat *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; break; } - return 1; + return true; } -int -nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fhandle(xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; break; } - return 1; + return true; } -int -nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (xdr_stream_encode_u32(xdr, resp->len) < 0) - return 0; + return false; xdr_write_pages(xdr, &resp->page, 0, resp->len); if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) - return 0; + return false; break; } - return 1; + return true; } -int -nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, resp->count); if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) - return 0; + return false; break; } - return 1; + return true; } -int -nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readdirres *resp = rqstp->rq_resp; struct xdr_buf *dirlist = &resp->dirlist; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) - return 0; + return false; if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) - return 0; + return false; break; } - return 1; + return true; } -int -nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_statfsres *resp = rqstp->rq_resp; struct kstatfs *stat = &resp->stats; + __be32 *p; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: p = xdr_reserve_space(xdr, XDR_UNIT * 5); if (!p) - return 0; + return false; *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2); *p++ = cpu_to_be32(stat->f_bsize); *p++ = cpu_to_be32(stat->f_blocks); @@ -563,7 +545,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p) break; } - return 1; + return true; } /** diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e73bdbb163..e94634d305 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -57,11 +57,11 @@ typedef struct { } stateid_t; typedef struct { - stateid_t stid; + stateid_t cs_stid; #define NFS4_COPY_STID 1 #define NFS4_COPYNOTIFY_STID 2 - unsigned char sc_type; - refcount_t sc_count; + unsigned char cs_type; + refcount_t cs_count; } copy_stateid_t; struct nfsd4_callback { @@ -149,6 +149,7 @@ struct nfs4_delegation { /* For recall: */ int dl_retries; struct nfsd4_callback dl_recall; + bool dl_recalled; }; #define cb_to_delegation(cb) \ @@ -174,7 +175,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) /* Maximum number of slots per session. 160 is useful for long haul TCP */ #define NFSD_MAX_SLOTS_PER_SESSION 160 /* Maximum number of operations per session compound */ -#define NFSD_MAX_OPS_PER_COMPOUND 16 +#define NFSD_MAX_OPS_PER_COMPOUND 50 /* Maximum session per slot cache size */ #define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ @@ -282,6 +283,28 @@ struct nfsd4_sessionid { #define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */ +/* + * State Meaning Where set + * -------------------------------------------------------------------------- + * | NFSD4_ACTIVE | Confirmed, active | Default | + * |------------------- ----------------------------------------------------| + * | NFSD4_COURTESY | Courtesy state. | nfs4_get_client_reaplist | + * | | Lease/lock/share | | + * | | reservation conflict | | + * | | can cause Courtesy | | + * | | client to be expired | | + * |------------------------------------------------------------------------| + * | NFSD4_EXPIRABLE | Courtesy client to be| nfs4_laundromat | + * | | expired by Laundromat| try_to_expire_client | + * | | due to conflict | | + * |------------------------------------------------------------------------| + */ +enum { + NFSD4_ACTIVE = 0, + NFSD4_COURTESY, + NFSD4_EXPIRABLE, +}; + /* * struct nfs4_client - one per client. Clientids live here. * @@ -345,6 +368,7 @@ struct nfs4_client { #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) +#define NFSD4_CLIENT_CB_RECALL_ANY (6) unsigned long cl_flags; const struct cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; @@ -385,6 +409,13 @@ struct nfs4_client { struct list_head async_copies; /* list of async copies */ spinlock_t async_lock; /* lock for async copies */ atomic_t cl_cb_inflight; /* Outstanding callbacks */ + + unsigned int cl_state; + atomic_t cl_delegs_in_recall; + + struct nfsd4_cb_recall_any *cl_ra; + time64_t cl_ra_time; + struct list_head cl_ra_cblist; }; /* struct nfs4_client_reset @@ -510,16 +541,13 @@ struct nfs4_clnt_odstate { * inode can have multiple filehandles associated with it, so there is * (potentially) a many to one relationship between this struct and struct * inode. - * - * These are hashed by filehandle in the file_hashtbl, which is protected by - * the global state_lock spinlock. */ struct nfs4_file { refcount_t fi_ref; struct inode * fi_inode; bool fi_aliased; spinlock_t fi_lock; - struct hlist_node fi_hash; /* hash on fi_fhandle */ + struct rhlist_head fi_rlist; struct list_head fi_stateids; union { struct list_head fi_delegations; @@ -568,6 +596,10 @@ struct nfs4_ol_stateid { struct list_head st_locks; struct nfs4_stateowner *st_stateowner; struct nfs4_clnt_odstate *st_clnt_odstate; +/* + * These bitmasks use 3 separate bits for READ, ALLOW, and BOTH; see the + * comment above bmap_to_share_mode() for explanation: + */ unsigned char st_access_bmap; unsigned char st_deny_bmap; struct nfs4_ol_stateid *st_openstp; @@ -609,6 +641,7 @@ enum nfsd4_cb_op { NFSPROC4_CLNT_CB_OFFLOAD, NFSPROC4_CLNT_CB_SEQUENCE, NFSPROC4_CLNT_CB_NOTIFY_LOCK, + NFSPROC4_CLNT_CB_RECALL_ANY, }; /* Returns true iff a is later than b: */ @@ -629,6 +662,7 @@ struct nfsd4_blocked_lock { struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; + struct kref nbl_kref; }; struct nfsd4_compound_state; @@ -661,18 +695,16 @@ extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); -extern void nfsd4_run_cb(struct nfsd4_callback *cb); +extern bool nfsd4_run_cb(struct nfsd4_callback *cb); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); -extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); void put_nfs4_file(struct nfs4_file *fi); -extern void nfs4_put_copy(struct nfsd4_copy *copy); extern struct nfsd4_copy * find_async_copy(struct nfs4_client *clp, stateid_t *staetid); extern void nfs4_put_cpntf_state(struct nfsd_net *nn, @@ -697,4 +729,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp); extern int nfsd4_client_record_check(struct nfs4_client *clp); extern void nfsd4_record_grace_done(struct nfsd_net *nn); +static inline bool try_to_expire_client(struct nfs4_client *clp) +{ + cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE); + return clp->cl_state == NFSD4_EXPIRABLE; +} #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 1d3b881e73..777e24e5da 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -32,7 +32,7 @@ struct svc_stat nfsd_svcstats = { .program = &nfsd_program, }; -static int nfsd_proc_show(struct seq_file *seq, void *v) +static int nfsd_show(struct seq_file *seq, void *v) { int i; @@ -45,7 +45,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v) percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE])); /* thread usage: */ - seq_printf(seq, "th %u 0", nfsdstats.th_cnt); + seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt)); /* deprecated thread usage histogram stats */ for (i = 0; i < 10; i++) @@ -72,17 +72,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v) return 0; } -static int nfsd_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, nfsd_proc_show, NULL); -} - -static const struct proc_ops nfsd_proc_ops = { - .proc_open = nfsd_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, -}; +DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); int nfsd_percpu_counters_init(struct percpu_counter counters[], int num) { diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h index 51ecda852e..9b43dc3d99 100644 --- a/fs/nfsd/stats.h +++ b/fs/nfsd/stats.h @@ -29,11 +29,9 @@ enum { struct nfsd_stats { struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM]; - /* Protected by nfsd_mutex */ - unsigned int th_cnt; /* number of available threads */ + atomic_t th_cnt; /* number of available threads */ }; - extern struct nfsd_stats nfsdstats; extern struct svc_stat nfsd_svcstats; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 0fc1fa6f28..276420ea3b 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,24 +9,12 @@ #define _NFSD_TRACE_H #include +#include +#include + #include "export.h" #include "nfsfh.h" - -#define NFSD_TRACE_PROC_ARG_FIELDS \ - __field(unsigned int, netns_ino) \ - __field(u32, xid) \ - __array(unsigned char, server, sizeof(struct sockaddr_in6)) \ - __array(unsigned char, client, sizeof(struct sockaddr_in6)) - -#define NFSD_TRACE_PROC_ARG_ASSIGNMENTS \ - do { \ - __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \ - __entry->xid = be32_to_cpu(rqstp->rq_xid); \ - memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \ - rqstp->rq_xprt->xpt_locallen); \ - memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \ - rqstp->rq_xprt->xpt_remotelen); \ - } while (0); +#include "xdr4.h" #define NFSD_TRACE_PROC_RES_FIELDS \ __field(unsigned int, netns_ino) \ @@ -46,49 +34,41 @@ rqstp->rq_xprt->xpt_remotelen); \ } while (0); -TRACE_EVENT(nfsd_garbage_args_err, +DECLARE_EVENT_CLASS(nfsd_xdr_err_class, TP_PROTO( const struct svc_rqst *rqstp ), TP_ARGS(rqstp), TP_STRUCT__entry( - NFSD_TRACE_PROC_ARG_FIELDS - + __field(unsigned int, netns_ino) + __field(u32, xid) __field(u32, vers) __field(u32, proc) + __sockaddr(server, rqstp->rq_xprt->xpt_locallen) + __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) ), TP_fast_assign( - NFSD_TRACE_PROC_ARG_ASSIGNMENTS + const struct svc_xprt *xprt = rqstp->rq_xprt; + __entry->netns_ino = xprt->xpt_net->ns.inum; + __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->vers = rqstp->rq_vers; __entry->proc = rqstp->rq_proc; + __assign_sockaddr(server, &xprt->xpt_local, xprt->xpt_locallen); + __assign_sockaddr(client, &xprt->xpt_remote, xprt->xpt_remotelen); ), TP_printk("xid=0x%08x vers=%u proc=%u", __entry->xid, __entry->vers, __entry->proc ) ); -TRACE_EVENT(nfsd_cant_encode_err, - TP_PROTO( - const struct svc_rqst *rqstp - ), - TP_ARGS(rqstp), - TP_STRUCT__entry( - NFSD_TRACE_PROC_ARG_FIELDS - - __field(u32, vers) - __field(u32, proc) - ), - TP_fast_assign( - NFSD_TRACE_PROC_ARG_ASSIGNMENTS +#define DEFINE_NFSD_XDR_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_xdr_err_class, nfsd_##name##_err, \ + TP_PROTO(const struct svc_rqst *rqstp), \ + TP_ARGS(rqstp)) - __entry->vers = rqstp->rq_vers; - __entry->proc = rqstp->rq_proc; - ), - TP_printk("xid=0x%08x vers=%u proc=%u", - __entry->xid, __entry->vers, __entry->proc - ) -); +DEFINE_NFSD_XDR_ERR_EVENT(garbage_args); +DEFINE_NFSD_XDR_ERR_EVENT(cant_encode); #define show_nfsd_may_flags(x) \ __print_flags(x, "|", \ @@ -107,19 +87,26 @@ TRACE_EVENT(nfsd_cant_encode_err, { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }) TRACE_EVENT(nfsd_compound, - TP_PROTO(const struct svc_rqst *rqst, - u32 args_opcnt), - TP_ARGS(rqst, args_opcnt), + TP_PROTO( + const struct svc_rqst *rqst, + const char *tag, + u32 taglen, + u32 opcnt + ), + TP_ARGS(rqst, tag, taglen, opcnt), TP_STRUCT__entry( __field(u32, xid) - __field(u32, args_opcnt) + __field(u32, opcnt) + __string_len(tag, tag, taglen) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqst->rq_xid); - __entry->args_opcnt = args_opcnt; + __entry->opcnt = opcnt; + __assign_str_len(tag, tag, taglen); ), - TP_printk("xid=0x%08x opcnt=%u", - __entry->xid, __entry->args_opcnt) + TP_printk("xid=0x%08x opcnt=%u tag=%s", + __entry->xid, __entry->opcnt, __get_str(tag) + ) ) TRACE_EVENT(nfsd_compound_status, @@ -194,6 +181,97 @@ TRACE_EVENT(nfsd_compound_encode_err, __entry->opnum, __entry->status) ); +#define show_fs_file_type(x) \ + __print_symbolic(x, \ + { S_IFLNK, "LNK" }, \ + { S_IFREG, "REG" }, \ + { S_IFDIR, "DIR" }, \ + { S_IFCHR, "CHR" }, \ + { S_IFBLK, "BLK" }, \ + { S_IFIFO, "FIFO" }, \ + { S_IFSOCK, "SOCK" }) + +TRACE_EVENT(nfsd_fh_verify, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + umode_t type, + int access + ), + TP_ARGS(rqstp, fhp, type, access), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) + __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) + __field(u32, xid) + __field(u32, fh_hash) + __field(const void *, inode) + __field(unsigned long, type) + __field(unsigned long, access) + ), + TP_fast_assign( + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; + __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local, + rqstp->rq_xprt->xpt_locallen); + __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->inode = d_inode(fhp->fh_dentry); + __entry->type = type; + __entry->access = access; + ), + TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s", + __entry->xid, __entry->fh_hash, + show_fs_file_type(__entry->type), + show_nfsd_may_flags(__entry->access) + ) +); + +TRACE_EVENT_CONDITION(nfsd_fh_verify_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + umode_t type, + int access, + __be32 error + ), + TP_ARGS(rqstp, fhp, type, access, error), + TP_CONDITION(error), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) + __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) + __field(u32, xid) + __field(u32, fh_hash) + __field(const void *, inode) + __field(unsigned long, type) + __field(unsigned long, access) + __field(int, error) + ), + TP_fast_assign( + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; + __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local, + rqstp->rq_xprt->xpt_locallen); + __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + if (fhp->fh_dentry) + __entry->inode = d_inode(fhp->fh_dentry); + else + __entry->inode = NULL; + __entry->type = type; + __entry->access = access; + __entry->error = be32_to_cpu(error); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s error=%d", + __entry->xid, __entry->fh_hash, + show_fs_file_type(__entry->type), + show_nfsd_may_flags(__entry->access), + __entry->error + ) +); DECLARE_EVENT_CLASS(nfsd_fh_err_class, TP_PROTO(struct svc_rqst *rqstp, @@ -412,10 +490,83 @@ TRACE_EVENT(nfsd_dirent, ) ) +DECLARE_EVENT_CLASS(nfsd_copy_err_class, + TP_PROTO(struct svc_rqst *rqstp, + struct svc_fh *src_fhp, + loff_t src_offset, + struct svc_fh *dst_fhp, + loff_t dst_offset, + u64 count, + int status), + TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status), + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, src_fh_hash) + __field(loff_t, src_offset) + __field(u32, dst_fh_hash) + __field(loff_t, dst_offset) + __field(u64, count) + __field(int, status) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle); + __entry->src_offset = src_offset; + __entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle); + __entry->dst_offset = dst_offset; + __entry->count = count; + __entry->status = status; + ), + TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld " + "dst_fh_hash=0x%08x dst_offset=%lld " + "count=%llu status=%d", + __entry->xid, __entry->src_fh_hash, __entry->src_offset, + __entry->dst_fh_hash, __entry->dst_offset, + (unsigned long long)__entry->count, + __entry->status) +) + +#define DEFINE_NFSD_COPY_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *src_fhp, \ + loff_t src_offset, \ + struct svc_fh *dst_fhp, \ + loff_t dst_offset, \ + u64 count, \ + int status), \ + TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \ + count, status)) + +DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err); + #include "state.h" #include "filecache.h" #include "vfs.h" +TRACE_EVENT(nfsd_delegret_wakeup, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + long timeo + ), + TP_ARGS(rqstp, inode, timeo), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(long, timeo) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->timeo = timeo; + ), + TP_printk("xid=0x%08x inode=%p%s", + __entry->xid, __entry->inode, + __entry->timeo == 0 ? " (timed out)" : "" + ) +); + DECLARE_EVENT_CLASS(nfsd_stateid_class, TP_PROTO(stateid_t *stp), TP_ARGS(stp), @@ -456,6 +607,7 @@ DEFINE_STATEID_EVENT(layout_recall_release); DEFINE_STATEID_EVENT(open); DEFINE_STATEID_EVENT(deleg_read); +DEFINE_STATEID_EVENT(deleg_return); DEFINE_STATEID_EVENT(deleg_recall); DECLARE_EVENT_CLASS(nfsd_stateseqid_class, @@ -488,6 +640,61 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \ DEFINE_STATESEQID_EVENT(preprocess); DEFINE_STATESEQID_EVENT(open_confirm); +TRACE_DEFINE_ENUM(NFS4_OPEN_STID); +TRACE_DEFINE_ENUM(NFS4_LOCK_STID); +TRACE_DEFINE_ENUM(NFS4_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_CLOSED_STID); +TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID); + +#define show_stid_type(x) \ + __print_flags(x, "|", \ + { NFS4_OPEN_STID, "OPEN" }, \ + { NFS4_LOCK_STID, "LOCK" }, \ + { NFS4_DELEG_STID, "DELEG" }, \ + { NFS4_CLOSED_STID, "CLOSED" }, \ + { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \ + { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \ + { NFS4_LAYOUT_STID, "LAYOUT" }) + +DECLARE_EVENT_CLASS(nfsd_stid_class, + TP_PROTO( + const struct nfs4_stid *stid + ), + TP_ARGS(stid), + TP_STRUCT__entry( + __field(unsigned long, sc_type) + __field(int, sc_count) + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + ), + TP_fast_assign( + const stateid_t *stp = &stid->sc_stateid; + + __entry->sc_type = stid->sc_type; + __entry->sc_count = refcount_read(&stid->sc_count); + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + ), + TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s", + __entry->cl_boot, __entry->cl_id, + __entry->si_id, __entry->si_generation, + __entry->sc_count, show_stid_type(__entry->sc_type) + ) +); + +#define DEFINE_STID_EVENT(name) \ +DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \ + TP_PROTO(const struct nfs4_stid *stid), \ + TP_ARGS(stid)) + +DEFINE_STID_EVENT(revoke); + DECLARE_EVENT_CLASS(nfsd_clientid_class, TP_PROTO(const clientid_t *clid), TP_ARGS(clid), @@ -537,6 +744,34 @@ DEFINE_EVENT(nfsd_net_class, nfsd_##name, \ DEFINE_NET_EVENT(grace_start); DEFINE_NET_EVENT(grace_complete); +TRACE_EVENT(nfsd_writeverf_reset, + TP_PROTO( + const struct nfsd_net *nn, + const struct svc_rqst *rqstp, + int error + ), + TP_ARGS(nn, rqstp, error), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(u32, xid) + __field(int, error) + __array(unsigned char, verifier, NFS4_VERIFIER_SIZE) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->error = error; + + /* avoid seqlock inside TP_fast_assign */ + memcpy(__entry->verifier, nn->writeverf, + NFS4_VERIFIER_SIZE); + ), + TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s", + __entry->boot_time, __entry->xid, __entry->error, + __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) + ) +); + TRACE_EVENT(nfsd_clid_cred_mismatch, TP_PROTO( const struct nfs4_client *clp, @@ -606,7 +841,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class, __array(unsigned char, addr, sizeof(struct sockaddr_in6)) __field(unsigned long, flavor) __array(unsigned char, verifier, NFS4_VERIFIER_SIZE) - __string_len(name, name, clp->cl_name.len) + __string_len(name, clp->cl_name.data, clp->cl_name.len) ), TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; @@ -640,13 +875,13 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) + { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \ + { 1 << NFSD_FILE_GC, "GC" }) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), TP_ARGS(nf), TP_STRUCT__entry( - __field(unsigned int, nf_hashval) __field(void *, nf_inode) __field(int, nf_ref) __field(unsigned long, nf_flags) @@ -654,15 +889,13 @@ DECLARE_EVENT_CLASS(nfsd_file_class, __field(struct file *, nf_file) ), TP_fast_assign( - __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; ), - TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p", - __entry->nf_hashval, + TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p", __entry->nf_inode, __entry->nf_ref, show_nf_flags(__entry->nf_flags), @@ -675,34 +908,60 @@ DEFINE_EVENT(nfsd_file_class, name, \ TP_PROTO(struct nfsd_file *nf), \ TP_ARGS(nf)) -DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc); -DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); +DEFINE_NFSD_FILE_EVENT(nfsd_file_free); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); -DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked); +DEFINE_NFSD_FILE_EVENT(nfsd_file_closing); +DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue); + +TRACE_EVENT(nfsd_file_alloc, + TP_PROTO( + const struct nfsd_file *nf + ), + TP_ARGS(nf), + TP_STRUCT__entry( + __field(const void *, nf_inode) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(unsigned int, nf_ref) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_flags = nf->nf_flags; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_may = nf->nf_may; + ), + TP_printk("inode=%p ref=%u flags=%s may=%s", + __entry->nf_inode, __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may) + ) +); TRACE_EVENT(nfsd_file_acquire, - TP_PROTO(struct svc_rqst *rqstp, unsigned int hash, - struct inode *inode, unsigned int may_flags, - struct nfsd_file *nf, __be32 status), + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + const struct nfsd_file *nf, + __be32 status + ), - TP_ARGS(rqstp, hash, inode, may_flags, nf, status), + TP_ARGS(rqstp, inode, may_flags, nf, status), TP_STRUCT__entry( __field(u32, xid) - __field(unsigned int, hash) - __field(void *, inode) + __field(const void *, inode) __field(unsigned long, may_flags) - __field(int, nf_ref) + __field(unsigned int, nf_ref) __field(unsigned long, nf_flags) __field(unsigned long, nf_may) - __field(struct file *, nf_file) + __field(const void *, nf_file) __field(u32, status) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqstp->rq_xid); - __entry->hash = hash; __entry->inode = inode; __entry->may_flags = may_flags; __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0; @@ -712,39 +971,131 @@ TRACE_EVENT(nfsd_file_acquire, __entry->status = be32_to_cpu(status); ), - TP_printk("xid=0x%x hash=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u", - __entry->xid, __entry->hash, __entry->inode, + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p status=%u", + __entry->xid, __entry->inode, show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, show_nf_flags(__entry->nf_flags), show_nfsd_may_flags(__entry->nf_may), - __entry->nf_file, __entry->status) + __entry->nf_file, __entry->status + ) ); -DECLARE_EVENT_CLASS(nfsd_file_search_class, - TP_PROTO(struct inode *inode, unsigned int hash, int found), - TP_ARGS(inode, hash, found), +TRACE_EVENT(nfsd_file_insert_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + long error + ), + TP_ARGS(rqstp, inode, may_flags, error), TP_STRUCT__entry( - __field(struct inode *, inode) - __field(unsigned int, hash) - __field(int, found) + __field(u32, xid) + __field(const void *, inode) + __field(unsigned long, may_flags) + __field(long, error) ), TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->inode = inode; - __entry->hash = hash; - __entry->found = found; + __entry->may_flags = may_flags; + __entry->error = error; ), - TP_printk("hash=0x%x inode=%p found=%d", __entry->hash, - __entry->inode, __entry->found) + TP_printk("xid=0x%x inode=%p may_flags=%s error=%ld", + __entry->xid, __entry->inode, + show_nfsd_may_flags(__entry->may_flags), + __entry->error + ) ); -#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ -DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO(struct inode *inode, unsigned int hash, int found), \ - TP_ARGS(inode, hash, found)) +TRACE_EVENT(nfsd_file_cons_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + const struct nfsd_file *nf + ), + TP_ARGS(rqstp, inode, may_flags, nf), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(unsigned long, may_flags) + __field(unsigned int, nf_ref) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(const void *, nf_file) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->may_flags = may_flags; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_file = nf->nf_file; + ), + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", + __entry->xid, __entry->inode, + show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), __entry->nf_file + ) +); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); +DECLARE_EVENT_CLASS(nfsd_file_open_class, + TP_PROTO(const struct nfsd_file *nf, __be32 status), + TP_ARGS(nf, status), + TP_STRUCT__entry( + __field(void *, nf_inode) /* cannot be dereferenced */ + __field(int, nf_ref) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(void *, nf_file) /* cannot be dereferenced */ + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_file = nf->nf_file; + ), + TP_printk("inode=%p ref=%d flags=%s may=%s file=%p", + __entry->nf_inode, + __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), + __entry->nf_file) +) + +#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \ +DEFINE_EVENT(nfsd_file_open_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf, \ + __be32 status \ + ), \ + TP_ARGS(nf, status)) + +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open); +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened); + +TRACE_EVENT(nfsd_file_is_cached, + TP_PROTO( + const struct inode *inode, + int found + ), + TP_ARGS(inode, found), + TP_STRUCT__entry( + __field(const struct inode *, inode) + __field(int, found) + ), + TP_fast_assign( + __entry->inode = inode; + __entry->found = found; + ), + TP_printk("inode=%p is %scached", + __entry->inode, + __entry->found ? "" : "not " + ) +); TRACE_EVENT(nfsd_file_fsnotify_handle_event, TP_PROTO(struct inode *inode, u32 mask), @@ -765,6 +1116,91 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event, __entry->nlink, __entry->mode, __entry->mask) ); +DECLARE_EVENT_CLASS(nfsd_file_gc_class, + TP_PROTO( + const struct nfsd_file *nf + ), + TP_ARGS(nf), + TP_STRUCT__entry( + __field(void *, nf_inode) + __field(void *, nf_file) + __field(int, nf_ref) + __field(unsigned long, nf_flags) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_file = nf->nf_file; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + ), + TP_printk("inode=%p ref=%d nf_flags=%s nf_file=%p", + __entry->nf_inode, __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + __entry->nf_file + ) +); + +#define DEFINE_NFSD_FILE_GC_EVENT(name) \ +DEFINE_EVENT(nfsd_file_gc_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf \ + ), \ + TP_ARGS(nf)) + +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add_disposed); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); + +DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, + TP_PROTO( + unsigned long removed, + unsigned long remaining + ), + TP_ARGS(removed, remaining), + TP_STRUCT__entry( + __field(unsigned long, removed) + __field(unsigned long, remaining) + ), + TP_fast_assign( + __entry->removed = removed; + __entry->remaining = remaining; + ), + TP_printk("%lu entries removed, %lu remaining", + __entry->removed, __entry->remaining) +); + +#define DEFINE_NFSD_FILE_LRUWALK_EVENT(name) \ +DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ + TP_PROTO( \ + unsigned long removed, \ + unsigned long remaining \ + ), \ + TP_ARGS(removed, remaining)) + +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); + +TRACE_EVENT(nfsd_file_close, + TP_PROTO( + const struct inode *inode + ), + TP_ARGS(inode), + TP_STRUCT__entry( + __field(const void *, inode) + ), + TP_fast_assign( + __entry->inode = inode; + ), + TP_printk("inode=%p", + __entry->inode + ) +); + #include "cache.h" TRACE_DEFINE_ENUM(RC_DROPIT); @@ -1060,6 +1496,92 @@ TRACE_EVENT(nfsd_cb_offload, __entry->fh_hash, __entry->count, __entry->status) ); +TRACE_EVENT(nfsd_cb_recall_any, + TP_PROTO( + const struct nfsd4_cb_recall_any *ra + ), + TP_ARGS(ra), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, keep) + __field(unsigned long, bmval0) + __sockaddr(addr, ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot; + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id; + __entry->keep = ra->ra_keep; + __entry->bmval0 = ra->ra_bmval[0]; + __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr, + ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen); + ), + TP_printk("addr=%pISpc client %08x:%08x keep=%u bmval0=%s", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->keep, show_rca_mask(__entry->bmval0) + ) +); + +DECLARE_EVENT_CLASS(nfsd_cb_done_class, + TP_PROTO( + const stateid_t *stp, + const struct rpc_task *task + ), + TP_ARGS(stp, task), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + __field(int, status) + ), + TP_fast_assign( + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + __entry->status = task->tk_status; + ), + TP_printk("client %08x:%08x stateid %08x:%08x status=%d", + __entry->cl_boot, __entry->cl_id, __entry->si_id, + __entry->si_generation, __entry->status + ) +); + +#define DEFINE_NFSD_CB_DONE_EVENT(name) \ +DEFINE_EVENT(nfsd_cb_done_class, name, \ + TP_PROTO( \ + const stateid_t *stp, \ + const struct rpc_task *task \ + ), \ + TP_ARGS(stp, task)) + +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done); +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done); +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done); +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done); + +TRACE_EVENT(nfsd_cb_recall_any_done, + TP_PROTO( + const struct nfsd4_callback *cb, + const struct rpc_task *task + ), + TP_ARGS(cb, task), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(int, status) + ), + TP_fast_assign( + __entry->status = task->tk_status; + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot; + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id; + ), + TP_printk("client %08x:%08x status=%d", + __entry->cl_boot, __entry->cl_id, __entry->status + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c39b8a6538..0f430548bf 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -32,14 +32,13 @@ #include #include -#ifdef CONFIG_NFSD_V3 #include "xdr3.h" -#endif /* CONFIG_NFSD_V3 */ #ifdef CONFIG_NFSD_V4 #include "../internal.h" #include "acl.h" #include "idmap.h" +#include "xdr4.h" #endif /* CONFIG_NFSD_V4 */ #include "nfsd.h" @@ -49,6 +48,69 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP +/** + * nfserrno - Map Linux errnos to NFS errnos + * @errno: POSIX(-ish) error code to be mapped + * + * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If + * it's an error we don't expect, log it once and return nfserr_io. + */ +__be32 +nfserrno (int errno) +{ + static struct { + __be32 nfserr; + int syserr; + } nfs_errtbl[] = { + { nfs_ok, 0 }, + { nfserr_perm, -EPERM }, + { nfserr_noent, -ENOENT }, + { nfserr_io, -EIO }, + { nfserr_nxio, -ENXIO }, + { nfserr_fbig, -E2BIG }, + { nfserr_stale, -EBADF }, + { nfserr_acces, -EACCES }, + { nfserr_exist, -EEXIST }, + { nfserr_xdev, -EXDEV }, + { nfserr_mlink, -EMLINK }, + { nfserr_nodev, -ENODEV }, + { nfserr_notdir, -ENOTDIR }, + { nfserr_isdir, -EISDIR }, + { nfserr_inval, -EINVAL }, + { nfserr_fbig, -EFBIG }, + { nfserr_nospc, -ENOSPC }, + { nfserr_rofs, -EROFS }, + { nfserr_mlink, -EMLINK }, + { nfserr_nametoolong, -ENAMETOOLONG }, + { nfserr_notempty, -ENOTEMPTY }, + { nfserr_dquot, -EDQUOT }, + { nfserr_stale, -ESTALE }, + { nfserr_jukebox, -ETIMEDOUT }, + { nfserr_jukebox, -ERESTARTSYS }, + { nfserr_jukebox, -EAGAIN }, + { nfserr_jukebox, -EWOULDBLOCK }, + { nfserr_jukebox, -ENOMEM }, + { nfserr_io, -ETXTBSY }, + { nfserr_notsupp, -EOPNOTSUPP }, + { nfserr_toosmall, -ETOOSMALL }, + { nfserr_serverfault, -ESERVERFAULT }, + { nfserr_serverfault, -ENFILE }, + { nfserr_io, -EREMOTEIO }, + { nfserr_stale, -EOPENSTALE }, + { nfserr_io, -EUCLEAN }, + { nfserr_perm, -ENOKEY }, + { nfserr_no_grace, -ENOGRACE}, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { + if (nfs_errtbl[i].syserr == errno) + return nfs_errtbl[i].nfserr; + } + WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); + return nfserr_io; +} + /* * Called from nfsd_lookup and encode_dirent. Check if we have crossed * a mount point. @@ -199,27 +261,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } } else { - /* - * In the nfsd4_open() case, this may be held across - * subsequent open and delegation acquisition which may - * need to take the child's i_mutex: - */ - fh_lock_nested(fhp, I_MUTEX_PARENT); - dentry = lookup_one_len(name, dparent, len); + dentry = lookup_one_len_unlocked(name, dparent, len); host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_nfserr; if (nfsd_mountpoint(dentry, exp)) { - /* - * We don't need the i_mutex after all. It's - * still possible we could open this (regular - * files can be mountpoints too), but the - * i_mutex is just there to prevent renames of - * something that we might be about to delegate, - * and a mountpoint won't be renamed: - */ - fh_unlock(fhp); - if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { + host_err = nfsd_cross_mnt(rqstp, &dentry, &exp); + if (host_err) { dput(dentry); goto out_nfserr; } @@ -234,7 +282,15 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, return nfserrno(host_err); } -/* +/** + * nfsd_lookup - look up a single path component for nfsd + * + * @rqstp: the request context + * @fhp: the file handle of the directory + * @name: the component name, or %NULL to look up parent + * @len: length of name to examine + * @resfh: pointer to pre-initialised filehandle to hold result. + * * Look up one component of a pathname. * N.B. After this call _both_ fhp and resfh need an fh_put * @@ -244,10 +300,11 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, * returned. Otherwise the covered directory is returned. * NOTE: this mountpoint crossing is not supported properly by all * clients and is explicitly disallowed for NFSv3 + * */ __be32 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, - unsigned int len, struct svc_fh *resfh) + unsigned int len, struct svc_fh *resfh) { struct svc_export *exp; struct dentry *dentry; @@ -305,6 +362,10 @@ commit_metadata(struct svc_fh *fhp) static void nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { + /* Ignore mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; + /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { iap->ia_mode &= S_IALLUGO; @@ -346,21 +407,77 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, return nfserrno(get_write_access(inode)); } -/* - * Set various file attributes. After this call fhp needs an fh_put. +static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) +{ + int host_err; + + if (iap->ia_valid & ATTR_SIZE) { + /* + * RFC5661, Section 18.30.4: + * Changing the size of a file with SETATTR indirectly + * changes the time_modify and change attributes. + * + * (and similar for the older RFCs) + */ + struct iattr size_attr = { + .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME, + .ia_size = iap->ia_size, + }; + + if (iap->ia_size < 0) + return -EFBIG; + + host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL); + if (host_err) + return host_err; + iap->ia_valid &= ~ATTR_SIZE; + + /* + * Avoid the additional setattr call below if the only other + * attribute that the client sends is the mtime, as we update + * it as part of the size change above. + */ + if ((iap->ia_valid & ~ATTR_MTIME) == 0) + return 0; + } + + if (!iap->ia_valid) + return 0; + + iap->ia_valid |= ATTR_CTIME; + return notify_change(&init_user_ns, dentry, iap, NULL); +} + +/** + * nfsd_setattr - Set various file attributes. + * @rqstp: controlling RPC transaction + * @fhp: filehandle of target + * @attr: attributes to set + * @check_guard: set to 1 if guardtime is a valid timestamp + * @guardtime: do not act if ctime.tv_sec does not match this timestamp + * + * This call may adjust the contents of @attr (in particular, this + * call may change the bits in the na_iattr.ia_valid field). + * + * Returns nfs_ok on success, otherwise an NFS status code is + * returned. Caller must release @fhp by calling fh_put in either + * case. */ __be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_attrs *attr, int check_guard, time64_t guardtime) { struct dentry *dentry; struct inode *inode; + struct iattr *iap = attr->na_iattr; int accmode = NFSD_MAY_SATTR; umode_t ftype = 0; __be32 err; - int host_err; + int host_err = 0; bool get_write_count; bool size_change = (iap->ia_valid & ATTR_SIZE); + int retries; if (iap->ia_valid & ATTR_SIZE) { accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; @@ -396,13 +513,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, dentry = fhp->fh_dentry; inode = d_inode(dentry); - /* Ignore any mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; - - if (!iap->ia_valid) - return 0; - nfsd_sanitize_attrs(inode, iap); if (check_guard && guardtime != inode->i_ctime.tv_sec) @@ -421,49 +531,43 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, return err; } - fh_lock(fhp); - if (size_change) { - /* - * RFC5661, Section 18.30.4: - * Changing the size of a file with SETATTR indirectly - * changes the time_modify and change attributes. - * - * (and similar for the older RFCs) - */ - struct iattr size_attr = { - .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME, - .ia_size = iap->ia_size, - }; - - host_err = -EFBIG; - if (iap->ia_size < 0) - goto out_unlock; - - host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL); - if (host_err) - goto out_unlock; - iap->ia_valid &= ~ATTR_SIZE; + inode_lock(inode); + fh_fill_pre_attrs(fhp); + for (retries = 1;;) { + struct iattr attrs; /* - * Avoid the additional setattr call below if the only other - * attribute that the client sends is the mtime, as we update - * it as part of the size change above. + * notify_change() can alter its iattr argument, making + * @iap unsuitable for submission multiple times. Make a + * copy for every loop iteration. */ - if ((iap->ia_valid & ~ATTR_MTIME) == 0) - goto out_unlock; + attrs = *iap; + host_err = __nfsd_setattr(dentry, &attrs); + if (host_err != -EAGAIN || !retries--) + break; + if (!nfsd_wait_for_delegreturn(rqstp, inode)) + break; } - - iap->ia_valid |= ATTR_CTIME; - host_err = notify_change(&init_user_ns, dentry, iap, NULL); - -out_unlock: - fh_unlock(fhp); + if (attr->na_seclabel && attr->na_seclabel->len) + attr->na_labelerr = security_inode_setsecctx(dentry, + attr->na_seclabel->data, attr->na_seclabel->len); + if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl) + attr->na_aclerr = set_posix_acl(&init_user_ns, + inode, ACL_TYPE_ACCESS, + attr->na_pacl); + if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && + !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode)) + attr->na_aclerr = set_posix_acl(&init_user_ns, + inode, ACL_TYPE_DEFAULT, + attr->na_dpacl); + fh_fill_post_attrs(fhp); + inode_unlock(inode); if (size_change) put_write_access(inode); out: if (!host_err) host_err = commit_metadata(fhp); - return nfserrno(host_err); + return err != 0 ? err : nfserrno(host_err); } #if defined(CONFIG_NFSD_V4) @@ -495,35 +599,16 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; return 1; } -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL -__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct xdr_netobj *label) -{ - __be32 error; - int host_error; - struct dentry *dentry; - error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); - if (error) - return error; - - dentry = fhp->fh_dentry; - - inode_lock(d_inode(dentry)); - host_error = security_inode_setsecctx(dentry, label->data, label->len); - inode_unlock(d_inode(dentry)); - return nfserrno(host_error); -} -#else -__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct xdr_netobj *label) +static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp) { - return nfserr_notsupp; + return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate; } -#endif -__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, - struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync) +__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, + struct nfsd_file *nf_src, u64 src_pos, + struct nfsd_file *nf_dst, u64 dst_pos, + u64 count, bool sync) { struct file *src = nf_src->nf_file; struct file *dst = nf_dst->nf_file; @@ -550,8 +635,17 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, if (!status) status = commit_inode_metadata(file_inode(src)); if (status < 0) { - nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net, - nfsd_net_id)); + struct nfsd_net *nn = net_generic(nf_dst->nf_net, + nfsd_net_id); + + trace_nfsd_clone_file_range_err(rqstp, + &nfsd4_get_cstate(rqstp)->save_fh, + src_pos, + &nfsd4_get_cstate(rqstp)->current_fh, + dst_pos, + count, status); + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, status); ret = nfserrno(status); } } @@ -598,7 +692,6 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, } #endif /* defined(CONFIG_NFSD_V4) */ -#ifdef CONFIG_NFSD_V3 /* * Check server access rights to a file system object */ @@ -710,7 +803,6 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor out: return error; } -#endif /* CONFIG_NFSD_V3 */ int nfsd_open_break_lease(struct inode *inode, int access) { @@ -743,9 +835,6 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, path.dentry = fhp->fh_dentry; inode = d_inode(path.dentry); - /* Disallow write access to files with the append-only bit set - * or any access when mandatory locking enabled - */ err = nfserr_perm; if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE)) goto out; @@ -793,6 +882,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { __be32 err; + bool retried = false; validate_process_creds(); /* @@ -808,21 +898,37 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, */ if (type == S_IFREG) may_flags |= NFSD_MAY_OWNER_OVERRIDE; +retry: err = fh_verify(rqstp, fhp, type, may_flags); - if (!err) + if (!err) { err = __nfsd_open(rqstp, fhp, type, may_flags, filp); + if (err == nfserr_stale && !retried) { + retried = true; + fh_put(fhp); + goto retry; + } + } validate_process_creds(); return err; } +/** + * nfsd_open_verified - Open a regular file for the filecache + * @rqstp: RPC request + * @fhp: NFS filehandle of the file to open + * @may_flags: internal permission flags + * @filp: OUT: open "struct file *" + * + * Returns an nfsstat value in network byte order. + */ __be32 -nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, - int may_flags, struct file **filp) +nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, + struct file **filp) { __be32 err; validate_process_creds(); - err = __nfsd_open(rqstp, fhp, type, may_flags, filp); + err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); validate_process_creds(); return err; } @@ -837,17 +943,23 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct svc_rqst *rqstp = sd->u.data; - struct page **pp = rqstp->rq_next_page; - struct page *page = buf->page; + struct page *page = buf->page; // may be a compound one + unsigned offset = buf->offset; + struct page *last_page; - if (rqstp->rq_res.page_len == 0) { - svc_rqst_replace_page(rqstp, page); - rqstp->rq_res.page_base = buf->offset; - } else if (page != pp[-1]) { + last_page = page + (offset + sd->len - 1) / PAGE_SIZE; + for (page += offset / PAGE_SIZE; page <= last_page; page++) { + /* + * Skip page replacement when extending the contents + * of the current page. + */ + if (page == *(rqstp->rq_next_page - 1)) + continue; svc_rqst_replace_page(rqstp, page); } + if (rqstp->rq_res.page_len == 0) // first call + rqstp->rq_res.page_base = offset % PAGE_SIZE; rqstp->rq_res.page_len += sd->len; - return sd->len; } @@ -960,6 +1072,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, unsigned long *cnt, int stable, __be32 *verf) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file *file = nf->nf_file; struct super_block *sb = file_inode(file)->i_sb; struct svc_export *exp; @@ -1003,25 +1116,14 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); since = READ_ONCE(file->f_wb_err); - if (flags & RWF_SYNC) { - if (verf) - nfsd_copy_boot_verifier(verf, - net_generic(SVC_NET(rqstp), - nfsd_net_id)); - host_err = vfs_iter_write(file, &iter, &pos, flags); - if (host_err < 0) - nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), - nfsd_net_id)); - } else { - if (verf) - nfsd_copy_boot_verifier(verf, - net_generic(SVC_NET(rqstp), - nfsd_net_id)); - host_err = vfs_iter_write(file, &iter, &pos, flags); - } + if (verf) + nfsd_copy_write_verifier(verf, nn); + file_start_write(file); + host_err = vfs_iter_write(file, &iter, &pos, flags); + file_end_write(file); if (host_err < 0) { - nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), - nfsd_net_id)); + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, host_err); goto out_nfserr; } *cnt = host_err; @@ -1033,9 +1135,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, if (stable && use_wgather) { host_err = wait_for_concurrent_writes(file); - if (host_err < 0) - nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), - nfsd_net_id)); + if (host_err < 0) { + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, host_err); + } } out_nfserr: @@ -1065,7 +1168,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err; trace_nfsd_read_start(rqstp, fhp, offset, *count); - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; @@ -1097,7 +1200,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, trace_nfsd_write_start(rqstp, fhp, offset, *cnt); - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf); + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf); if (err) goto out; @@ -1109,11 +1212,11 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, return err; } -#ifdef CONFIG_NFSD_V3 /** * nfsd_commit - Commit pending writes to stable storage * @rqstp: RPC request being processed * @fhp: NFS filehandle + * @nf: target file * @offset: raw offset from beginning of file * @count: raw count of bytes to sync * @verf: filled in with the server's current write verifier @@ -1130,19 +1233,13 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, * An nfsstat value in network byte order. */ __be32 -nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, - u32 count, __be32 *verf) +nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, + u64 offset, u32 count, __be32 *verf) { + __be32 err = nfs_ok; u64 maxbytes; loff_t start, end; struct nfsd_net *nn; - struct nfsd_file *nf; - __be32 err; - - err = nfsd_file_acquire(rqstp, fhp, - NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); - if (err) - goto out; /* * Convert the client-provided (offset, count) range to a @@ -1167,7 +1264,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, err2 = vfs_fsync_range(nf->nf_file, start, end, 0); switch (err2) { case 0: - nfsd_copy_boot_verifier(verf, nn); + nfsd_copy_write_verifier(verf, nn); err2 = filemap_check_wb_err(nf->nf_file->f_mapping, since); err = nfserrno(err2); @@ -1176,26 +1273,37 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, err = nfserr_notsupp; break; default: - nfsd_reset_boot_verifier(nn); + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, err2); err = nfserrno(err2); } } else - nfsd_copy_boot_verifier(verf, nn); + nfsd_copy_write_verifier(verf, nn); - nfsd_file_put(nf); -out: return err; } -#endif /* CONFIG_NFSD_V3 */ -static __be32 -nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, - struct iattr *iap) +/** + * nfsd_create_setattr - Set a created file's attributes + * @rqstp: RPC transaction being executed + * @fhp: NFS filehandle of parent directory + * @resfhp: NFS filehandle of new object + * @attrs: requested attributes of new object + * + * Returns nfs_ok on success, or an nfsstat in network byte order. + */ +__be32 +nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct svc_fh *resfhp, struct nfsd_attrs *attrs) { + struct iattr *iap = attrs->na_iattr; + __be32 status; + /* - * Mode has already been set earlier in create: + * Mode has already been set by file creation. */ iap->ia_valid &= ~ATTR_MODE; + /* * Setting uid/gid works only for root. Irix appears to * send along the gid on create when it tries to implement @@ -1203,10 +1311,31 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, */ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) iap->ia_valid &= ~(ATTR_UID|ATTR_GID); + + /* + * Callers expect new file metadata to be committed even + * if the attributes have not changed. + */ if (iap->ia_valid) - return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0); - /* Callers expect file metadata to be committed here */ - return nfserrno(commit_metadata(resfhp)); + status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0); + else + status = nfserrno(commit_metadata(resfhp)); + + /* + * Transactional filesystems had a chance to commit changes + * for both parent and child simultaneously making the + * following commit_metadata a noop in many cases. + */ + if (!status) + status = nfserrno(commit_metadata(fhp)); + + /* + * Update the new filehandle to pick up the new attributes. + */ + if (!status) + status = fh_update(resfhp); + + return status; } /* HPUX client sometimes creates a file in mode 000, and sets size to 0. @@ -1227,26 +1356,19 @@ nfsd_check_ignore_resizing(struct iattr *iap) /* The parent directory should already be locked: */ __be32 nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct iattr *iap, - int type, dev_t rdev, struct svc_fh *resfhp) + struct nfsd_attrs *attrs, + int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild; struct inode *dirp; + struct iattr *iap = attrs->na_iattr; __be32 err; - __be32 err2; int host_err; dentry = fhp->fh_dentry; dirp = d_inode(dentry); dchild = dget(resfhp->fh_dentry); - if (!fhp->fh_locked) { - WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n", - dentry); - err = nfserr_io; - goto out; - } - err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE); if (err) goto out; @@ -1259,7 +1381,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode &= ~current_umask(); err = 0; - host_err = 0; switch (type) { case S_IFREG: host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true); @@ -1306,22 +1427,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err < 0) goto out_nfserr; - err = nfsd_create_setattr(rqstp, resfhp, iap); + err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); - /* - * nfsd_create_setattr already committed the child. Transactional - * filesystems had a chance to commit changes for both parent and - * child simultaneously making the following commit_metadata a - * noop. - */ - err2 = nfserrno(commit_metadata(fhp)); - if (err2) - err = err2; - /* - * Update the file handle to get the new inode info. - */ - if (!err) - err = fh_update(resfhp); out: dput(dchild); return err; @@ -1339,8 +1446,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, */ __be32 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct iattr *iap, - int type, dev_t rdev, struct svc_fh *resfhp) + char *fname, int flen, struct nfsd_attrs *attrs, + int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild = NULL; __be32 err; @@ -1359,11 +1466,13 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err) return nfserrno(host_err); - fh_lock_nested(fhp, I_MUTEX_PARENT); + inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); dchild = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - return nfserrno(host_err); + if (IS_ERR(dchild)) { + err = nfserrno(host_err); + goto out_unlock; + } err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); /* * We unconditionally drop our ref to dchild as fh_compose will have @@ -1371,178 +1480,14 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ dput(dchild); if (err) - return err; - return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type, - rdev, resfhp); -} - -#ifdef CONFIG_NFSD_V3 - -/* - * NFSv3 and NFSv4 version of nfsd_create - */ -__be32 -do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct iattr *iap, - struct svc_fh *resfhp, int createmode, u32 *verifier, - bool *truncp, bool *created) -{ - struct dentry *dentry, *dchild = NULL; - struct inode *dirp; - __be32 err; - int host_err; - __u32 v_mtime=0, v_atime=0; - - err = nfserr_perm; - if (!flen) - goto out; - err = nfserr_exist; - if (isdotent(fname, flen)) - goto out; - if (!(iap->ia_valid & ATTR_MODE)) - iap->ia_mode = 0; - err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); - if (err) - goto out; - - dentry = fhp->fh_dentry; - dirp = d_inode(dentry); - - host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; - - fh_lock_nested(fhp, I_MUTEX_PARENT); - - /* - * Compose the response file handle. - */ - dchild = lookup_one_len(fname, dentry, flen); - host_err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - goto out_nfserr; - - /* If file doesn't exist, check for permissions to create one */ - if (d_really_is_negative(dchild)) { - err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); - if (err) - goto out; - } - - err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); - if (err) - goto out; - - if (nfsd_create_is_exclusive(createmode)) { - /* solaris7 gets confused (bugid 4218508) if these have - * the high bit set, so just clear the high bits. If this is - * ever changed to use different attrs for storing the - * verifier, then do_open_lookup() will also need to be fixed - * accordingly. - */ - v_mtime = verifier[0]&0x7fffffff; - v_atime = verifier[1]&0x7fffffff; - } - - if (d_really_is_positive(dchild)) { - err = 0; - - switch (createmode) { - case NFS3_CREATE_UNCHECKED: - if (! d_is_reg(dchild)) - goto out; - else if (truncp) { - /* in nfsv4, we need to treat this case a little - * differently. we don't want to truncate the - * file now; this would be wrong if the OPEN - * fails for some other reason. furthermore, - * if the size is nonzero, we should ignore it - * according to spec! - */ - *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size; - } - else { - iap->ia_valid &= ATTR_SIZE; - goto set_attr; - } - break; - case NFS3_CREATE_EXCLUSIVE: - if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime - && d_inode(dchild)->i_atime.tv_sec == v_atime - && d_inode(dchild)->i_size == 0 ) { - if (created) - *created = true; - break; - } - fallthrough; - case NFS4_CREATE_EXCLUSIVE4_1: - if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime - && d_inode(dchild)->i_atime.tv_sec == v_atime - && d_inode(dchild)->i_size == 0 ) { - if (created) - *created = true; - goto set_attr; - } - fallthrough; - case NFS3_CREATE_GUARDED: - err = nfserr_exist; - } - fh_drop_write(fhp); - goto out; - } - - if (!IS_POSIXACL(dirp)) - iap->ia_mode &= ~current_umask(); - - host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true); - if (host_err < 0) { - fh_drop_write(fhp); - goto out_nfserr; - } - if (created) - *created = true; - - nfsd_check_ignore_resizing(iap); - - if (nfsd_create_is_exclusive(createmode)) { - /* Cram the verifier into atime/mtime */ - iap->ia_valid = ATTR_MTIME|ATTR_ATIME - | ATTR_MTIME_SET|ATTR_ATIME_SET; - /* XXX someone who knows this better please fix it for nsec */ - iap->ia_mtime.tv_sec = v_mtime; - iap->ia_atime.tv_sec = v_atime; - iap->ia_mtime.tv_nsec = 0; - iap->ia_atime.tv_nsec = 0; - } - - set_attr: - err = nfsd_create_setattr(rqstp, resfhp, iap); - - /* - * nfsd_create_setattr already committed the child - * (and possibly also the parent). - */ - if (!err) - err = nfserrno(commit_metadata(fhp)); - - /* - * Update the filehandle to get the new inode info. - */ - if (!err) - err = fh_update(resfhp); - - out: - fh_unlock(fhp); - if (dchild && !IS_ERR(dchild)) - dput(dchild); - fh_drop_write(fhp); - return err; - - out_nfserr: - err = nfserrno(host_err); - goto out; + goto out_unlock; + fh_fill_pre_attrs(fhp); + err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp); + fh_fill_post_attrs(fhp); +out_unlock: + inode_unlock(dentry->d_inode); + return err; } -#endif /* CONFIG_NFSD_V3 */ /* * Read a symlink. On entry, *lenp must contain the maximum path length that @@ -1582,15 +1527,25 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) return 0; } -/* - * Create a symlink and look up its inode +/** + * nfsd_symlink - Create a symlink and look up its inode + * @rqstp: RPC transaction being executed + * @fhp: NFS filehandle of parent directory + * @fname: filename of the new symlink + * @flen: length of @fname + * @path: content of the new symlink (NUL-terminated) + * @attrs: requested attributes of new object + * @resfhp: NFS filehandle of new object + * * N.B. After this call _both_ fhp and resfhp need an fh_put + * + * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, - char *path, - struct svc_fh *resfhp) + char *fname, int flen, + char *path, struct nfsd_attrs *attrs, + struct svc_fh *resfhp) { struct dentry *dentry, *dnew; __be32 err, cerr; @@ -1608,33 +1563,35 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; + if (host_err) { + err = nfserrno(host_err); + goto out; + } - fh_lock(fhp); dentry = fhp->fh_dentry; + inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); dnew = lookup_one_len(fname, dentry, flen); - host_err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; - + if (IS_ERR(dnew)) { + err = nfserrno(PTR_ERR(dnew)); + inode_unlock(dentry->d_inode); + goto out_drop_write; + } + fh_fill_pre_attrs(fhp); host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path); err = nfserrno(host_err); - fh_unlock(fhp); + cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); + if (!err) + nfsd_create_setattr(rqstp, fhp, resfhp, attrs); + fh_fill_post_attrs(fhp); + inode_unlock(dentry->d_inode); if (!err) err = nfserrno(commit_metadata(fhp)); - - fh_drop_write(fhp); - - cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); dput(dnew); if (err==0) err = cerr; +out_drop_write: + fh_drop_write(fhp); out: return err; - -out_nfserr: - err = nfserrno(host_err); - goto out; } /* @@ -1672,22 +1629,25 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, goto out; } - fh_lock_nested(ffhp, I_MUTEX_PARENT); ddir = ffhp->fh_dentry; dirp = d_inode(ddir); + inode_lock_nested(dirp, I_MUTEX_PARENT); dnew = lookup_one_len(name, ddir, len); - host_err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; + if (IS_ERR(dnew)) { + err = nfserrno(PTR_ERR(dnew)); + goto out_unlock; + } dold = tfhp->fh_dentry; err = nfserr_noent; if (d_really_is_negative(dold)) goto out_dput; + fh_fill_pre_attrs(ffhp); host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL); - fh_unlock(ffhp); + fh_fill_post_attrs(ffhp); + inode_unlock(dirp); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); if (!err) @@ -1698,17 +1658,17 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, else err = nfserrno(host_err); } -out_dput: dput(dnew); -out_unlock: - fh_unlock(ffhp); +out_drop_write: fh_drop_write(tfhp); out: return err; -out_nfserr: - err = nfserrno(host_err); - goto out_unlock; +out_dput: + dput(dnew); +out_unlock: + inode_unlock(dirp); + goto out_drop_write; } static void @@ -1775,12 +1735,9 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, goto out; } - /* cannot use fh_lock as we need deadlock protective ordering - * so do it by hand */ trap = lock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = true; - fill_pre_wcc(ffhp); - fill_pre_wcc(tfhp); + fh_fill_pre_attrs(ffhp); + fh_fill_pre_attrs(tfhp); odentry = lookup_one_len(fname, fdentry, flen); host_err = PTR_ERR(odentry); @@ -1815,7 +1772,15 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, .new_dir = tdir, .new_dentry = ndentry, }; - host_err = vfs_rename(&rd); + int retries; + + for (retries = 1;;) { + host_err = vfs_rename(&rd); + if (host_err != -EAGAIN || !retries--) + break; + if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry))) + break; + } if (!host_err) { host_err = commit_metadata(tfhp); if (!host_err) @@ -1828,17 +1793,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, dput(odentry); out_nfserr: err = nfserrno(host_err); - /* - * We cannot rely on fh_unlock on the two filehandles, - * as that would do the wrong thing if the two directories - * were the same, so again we do it by hand. - */ + if (!close_cached) { - fill_post_wcc(ffhp); - fill_post_wcc(tfhp); + fh_fill_post_attrs(ffhp); + fh_fill_post_attrs(tfhp); } unlock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = false; fh_drop_write(ffhp); /* @@ -1882,19 +1842,19 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (host_err) goto out_nfserr; - fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = fhp->fh_dentry; dirp = d_inode(dentry); + inode_lock_nested(dirp, I_MUTEX_PARENT); rdentry = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) - goto out_drop_write; + goto out_unlock; if (d_really_is_negative(rdentry)) { dput(rdentry); host_err = -ENOENT; - goto out_drop_write; + goto out_unlock; } rinode = d_inode(rdentry); ihold(rinode); @@ -1902,15 +1862,26 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!type) type = d_inode(rdentry)->i_mode & S_IFMT; + fh_fill_pre_attrs(fhp); if (type != S_IFDIR) { + int retries; + if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) nfsd_close_cached_files(rdentry); - host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL); + + for (retries = 1;;) { + host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL); + if (host_err != -EAGAIN || !retries--) + break; + if (!nfsd_wait_for_delegreturn(rqstp, rinode)) + break; + } } else { host_err = vfs_rmdir(&init_user_ns, dirp, rdentry); } + fh_fill_post_attrs(fhp); - fh_unlock(fhp); + inode_unlock(dirp); if (!host_err) host_err = commit_metadata(fhp); dput(rdentry); @@ -1932,6 +1903,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, } out: return err; +out_unlock: + inode_unlock(dirp); + goto out_drop_write; } /* @@ -2285,13 +2259,16 @@ nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp, return err; } -/* - * Removexattr and setxattr need to call fh_lock to both lock the inode - * and set the change attribute. Since the top-level vfs_removexattr - * and vfs_setxattr calls already do their own inode_lock calls, call - * the _locked variant. Pass in a NULL pointer for delegated_inode, - * and let the client deal with NFS4ERR_DELAY (same as with e.g. - * setattr and remove). +/** + * nfsd_removexattr - Remove an extended attribute + * @rqstp: RPC transaction being executed + * @fhp: NFS filehandle of object with xattr to remove + * @name: name of xattr to remove (NUL-terminate) + * + * Pass in a NULL pointer for delegated_inode, and let the client deal + * with NFS4ERR_DELAY (same as with e.g. setattr and remove). + * + * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) @@ -2307,12 +2284,14 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) if (ret) return nfserrno(ret); - fh_lock(fhp); + inode_lock(fhp->fh_dentry->d_inode); + fh_fill_pre_attrs(fhp); ret = __vfs_removexattr_locked(&init_user_ns, fhp->fh_dentry, name, NULL); - fh_unlock(fhp); + fh_fill_post_attrs(fhp); + inode_unlock(fhp->fh_dentry->d_inode); fh_drop_write(fhp); return nfsd_xattr_errno(ret); @@ -2332,12 +2311,13 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, ret = fh_want_write(fhp); if (ret) return nfserrno(ret); - fh_lock(fhp); + inode_lock(fhp->fh_dentry->d_inode); + fh_fill_pre_attrs(fhp); ret = __vfs_setxattr_locked(&init_user_ns, fhp->fh_dentry, name, buf, len, flags, NULL); - - fh_unlock(fhp); + fh_fill_post_attrs(fhp); + inode_unlock(fhp->fh_dentry->d_inode); fh_drop_write(fhp); return nfsd_xattr_errno(ret); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 3cf5a8a13d..dbdfef7ae8 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -6,6 +6,8 @@ #ifndef LINUX_NFSD_VFS_H #define LINUX_NFSD_VFS_H +#include +#include #include "nfsfh.h" #include "nfsd.h" @@ -42,6 +44,23 @@ struct nfsd_file; typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); /* nfsd/vfs.c */ +struct nfsd_attrs { + struct iattr *na_iattr; /* input */ + struct xdr_netobj *na_seclabel; /* input */ + struct posix_acl *na_pacl; /* input */ + struct posix_acl *na_dpacl; /* input */ + + int na_labelerr; /* output */ + int na_aclerr; /* output */ +}; + +static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) +{ + posix_acl_release(attrs->na_pacl); + posix_acl_release(attrs->na_dpacl); +} + +__be32 nfserrno (int errno); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, @@ -50,32 +69,28 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, const char *, unsigned int, struct svc_export **, struct dentry **); __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, - struct iattr *, int, time64_t); + struct nfsd_attrs *, int, time64_t); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 -__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, - struct xdr_netobj *); __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, loff_t, int); -__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, +__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, + struct nfsd_file *nf_src, u64 src_pos, struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct iattr *attrs, - int type, dev_t rdev, struct svc_fh *res); + struct nfsd_attrs *attrs, int type, dev_t rdev, + struct svc_fh *res); __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct iattr *attrs, + char *name, int len, struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *res); -#ifdef CONFIG_NFSD_V3 __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); -__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct iattr *attrs, - struct svc_fh *res, int createmode, - u32 *verifier, bool *truncp, bool *created); +__be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct svc_fh *resfhp, struct nfsd_attrs *iap); __be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp, - u64 offset, u32 count, __be32 *verf); -#endif /* CONFIG_NFSD_V3 */ + struct nfsd_file *nf, u64 offset, u32 count, + __be32 *verf); #ifdef CONFIG_NFSD_V4 __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, void **bufp, int *lenp); @@ -89,7 +104,7 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, int nfsd_open_break_lease(struct inode *, int); __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); -__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t, +__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, int, struct file **); __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, @@ -113,8 +128,9 @@ __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, - char *name, int len, char *path, - struct svc_fh *res); + char *name, int len, char *path, + struct nfsd_attrs *attrs, + struct svc_fh *res); __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *); ssize_t nfsd_copy_file_range(struct file *, u64, @@ -160,10 +176,4 @@ static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat) AT_STATX_SYNC_AS_STAT)); } -static inline int nfsd_create_is_exclusive(int createmode) -{ - return createmode == NFS3_CREATE_EXCLUSIVE - || createmode == NFS4_CREATE_EXCLUSIVE4_1; -} - #endif /* LINUX_NFSD_VFS_H */ diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 863a35f249..852f71580b 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -141,23 +141,24 @@ union nfsd_xdrstore { #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore) -int nfssvc_decode_fhandleargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_readargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_createargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *); -int nfssvc_encode_statres(struct svc_rqst *, __be32 *); -int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *); -int nfssvc_encode_diropres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readres(struct svc_rqst *, __be32 *); -int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *); +bool nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); + +bool nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset); int nfssvc_encode_entry(void *data, const char *name, int namlen, diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 712c117300..03fe4e2130 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -265,36 +265,37 @@ union nfsd3_xdrstore { #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) -int nfs3svc_decode_fhandleargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *); -int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); -int nfs3svc_encode_lookupres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_createres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *); -int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *); +bool nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); + +bool nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr); void nfs3svc_release_fhandle(struct svc_rqst *); void nfs3svc_release_fhandle2(struct svc_rqst *); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 45257666a6..510978e602 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -273,11 +273,13 @@ struct nfsd4_open { bool op_truncate; /* used during processing */ bool op_created; /* used during processing */ struct nfs4_openowner *op_openowner; /* used during processing */ + struct file *op_filp; /* used during processing */ struct nfs4_file *op_file; /* used during processing */ struct nfs4_ol_stateid *op_stp; /* used during processing */ struct nfs4_clnt_odstate *op_odstate; /* used during processing */ struct nfs4_acl *op_acl; struct xdr_netobj op_label; + struct svc_rqst *op_rqstp; }; struct nfsd4_open_confirm { @@ -301,9 +303,10 @@ struct nfsd4_read { u32 rd_length; /* request */ int rd_vlen; struct nfsd_file *rd_nf; - + struct svc_rqst *rd_rqstp; /* response */ - struct svc_fh *rd_fhp; /* response */ + struct svc_fh *rd_fhp; /* response */ + u32 rd_eof; /* response */ }; struct nfsd4_readdir { @@ -531,6 +534,13 @@ struct nfsd42_write_res { stateid_t cb_stateid; }; +struct nfsd4_cb_offload { + struct nfsd4_callback co_cb; + struct nfsd42_write_res co_res; + __be32 co_nfserr; + struct knfsd_fh co_fh; +}; + struct nfsd4_copy { /* request */ stateid_t cp_src_stateid; @@ -538,18 +548,16 @@ struct nfsd4_copy { u64 cp_src_pos; u64 cp_dst_pos; u64 cp_count; - struct nl4_server cp_src; - bool cp_intra; + struct nl4_server *cp_src; - /* both */ - u32 cp_synchronous; + unsigned long cp_flags; +#define NFSD4_COPY_F_STOPPED (0) +#define NFSD4_COPY_F_INTRA (1) +#define NFSD4_COPY_F_SYNCHRONOUS (2) +#define NFSD4_COPY_F_COMMITTED (3) /* response */ struct nfsd42_write_res cp_res; - - /* for cb_offload */ - struct nfsd4_callback cp_cb; - __be32 nfserr; struct knfsd_fh fh; struct nfs4_client *cp_clp; @@ -562,14 +570,35 @@ struct nfsd4_copy { struct list_head copies; struct task_struct *copy_task; refcount_t refcount; - bool stopped; - struct vfsmount *ss_mnt; + struct nfsd4_ssc_umount_item *ss_nsui; struct nfs_fh c_fh; nfs4_stateid stateid; - bool committed; }; +static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync) +{ + if (sync) + set_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); + else + clear_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_copy_is_sync(const struct nfsd4_copy *copy) +{ + return test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_copy_is_async(const struct nfsd4_copy *copy) +{ + return !test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_ssc_is_inter(const struct nfsd4_copy *copy) +{ + return !test_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); +} + struct nfsd4_seek { /* request */ stateid_t seek_stateid; @@ -593,19 +622,20 @@ struct nfsd4_offload_status { struct nfsd4_copy_notify { /* request */ stateid_t cpn_src_stateid; - struct nl4_server cpn_dst; + struct nl4_server *cpn_dst; /* response */ stateid_t cpn_cnr_stateid; u64 cpn_sec; u32 cpn_nsec; - struct nl4_server cpn_src; + struct nl4_server *cpn_src; }; struct nfsd4_op { u32 opnum; - const struct nfsd4_operation * opdesc; __be32 status; + const struct nfsd4_operation *opdesc; + struct nfs4_replay *replay; union nfsd4_op_u { struct nfsd4_access access; struct nfsd4_close close; @@ -669,7 +699,6 @@ struct nfsd4_op { struct nfsd4_listxattrs listxattrs; struct nfsd4_removexattr removexattr; } u; - struct nfs4_replay * replay; }; bool nfsd4_cache_this_op(struct nfsd4_op *); @@ -695,7 +724,6 @@ struct nfsd4_compoundargs { u32 opcnt; struct nfsd4_op *ops; struct nfsd4_op iops[8]; - int cachetype; }; struct nfsd4_compoundres { @@ -703,10 +731,11 @@ struct nfsd4_compoundres { struct xdr_stream *xdr; struct svc_rqst * rqstp; - u32 taglen; + __be32 *statusp; char * tag; + u32 taglen; u32 opcnt; - __be32 * tagp; /* tag, opcount encode location */ + struct nfsd4_compound_state cstate; }; @@ -757,8 +786,8 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp); -int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *); -int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *); +bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); @@ -859,12 +888,18 @@ struct nfsd4_operation { u32 op_flags; char *op_name; /* Try to get response size before operation */ - u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *); + u32 (*op_rsize_bop)(const struct svc_rqst *rqstp, + const struct nfsd4_op *op); void (*op_get_currentstateid)(struct nfsd4_compound_state *, union nfsd4_op_u *); void (*op_set_currentstateid)(struct nfsd4_compound_state *, union nfsd4_op_u *); }; +struct nfsd4_cb_recall_any { + struct nfsd4_callback ra_cb; + u32 ra_keep; + u32 ra_bmval[1]; +}; #endif diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index 547cf07cf4..0d39af1b00 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -48,3 +48,9 @@ #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) +#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + 1 + 1) +#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index def9121a46..385ec71fcd 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -724,7 +724,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, dat = nilfs_bmap_get_dat(btree); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr = blocknr; } cnt = 1; @@ -743,7 +743,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - goto out; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt || ++cnt == maxblocks) @@ -781,6 +781,11 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, out: nilfs_btree_free_path(path); return ret; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + goto out; } static void nilfs_btree_promote_key(struct nilfs_bmap *btree, diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 81394e22d0..eb7de9e2a3 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -243,7 +243,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = { #define S_SHIFT 12 static unsigned char -nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { +nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index f353101955..7faf8c285d 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -66,7 +66,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, dat = nilfs_bmap_get_dat(direct); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr = blocknr; } @@ -79,7 +79,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) - return ret; + goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt) @@ -87,6 +87,11 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, } *ptrp = ptr; return cnt; + + dat_error: + if (ret == -ENOENT) + ret = -EINVAL; /* Notify bmap layer of metadata corruption */ + return ret; } static __u64 diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 324e23236c..475fd522c7 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -112,7 +112,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", __func__, inode->i_ino, (unsigned long long)blkoff); - err = 0; + err = -EAGAIN; } nilfs_transaction_abort(inode->i_sb); goto out; diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index a39206705d..6a2f779e0b 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -60,7 +60,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_nmembs == 0) return 0; - if (argv->v_size > PAGE_SIZE) + if ((size_t)argv->v_size > PAGE_SIZE) return -EINVAL; /* diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 17ff9589c4..1a5f2daa4a 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2159,8 +2159,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) { spin_lock(&sci->sc_state_lock); if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { - sci->sc_timer.expires = jiffies + sci->sc_interval; - add_timer(&sci->sc_timer); + if (sci->sc_task) { + sci->sc_timer.expires = jiffies + sci->sc_interval; + add_timer(&sci->sc_timer); + } sci->sc_state |= NILFS_SEGCTOR_COMMIT; } spin_unlock(&sci->sc_state_lock); @@ -2207,19 +2209,36 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) struct nilfs_segctor_wait_request wait_req; int err = 0; - spin_lock(&sci->sc_state_lock); init_wait(&wait_req.wq); wait_req.err = 0; atomic_set(&wait_req.done, 0); + init_waitqueue_entry(&wait_req.wq, current); + + /* + * To prevent a race issue where completion notifications from the + * log writer thread are missed, increment the request sequence count + * "sc_seq_request" and insert a wait queue entry using the current + * sequence number into the "sc_wait_request" queue at the same time + * within the lock section of "sc_state_lock". + */ + spin_lock(&sci->sc_state_lock); wait_req.seq = ++sci->sc_seq_request; + add_wait_queue(&sci->sc_wait_request, &wait_req.wq); spin_unlock(&sci->sc_state_lock); - init_waitqueue_entry(&wait_req.wq, current); - add_wait_queue(&sci->sc_wait_request, &wait_req.wq); - set_current_state(TASK_INTERRUPTIBLE); wake_up(&sci->sc_wait_daemon); for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + /* + * Synchronize only while the log writer thread is alive. + * Leave flushing out after the log writer thread exits to + * the cleanup work in nilfs_segctor_destroy(). + */ + if (!sci->sc_task) + break; + if (atomic_read(&wait_req.done)) { err = wait_req.err; break; @@ -2235,7 +2254,7 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) return err; } -static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) +static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err, bool force) { struct nilfs_segctor_wait_request *wrq, *n; unsigned long flags; @@ -2243,7 +2262,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) spin_lock_irqsave(&sci->sc_wait_request.lock, flags); list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) { if (!atomic_read(&wrq->done) && - nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { + (force || nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq))) { wrq->err = err; atomic_set(&wrq->done, 1); } @@ -2363,10 +2382,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, */ static void nilfs_segctor_accept(struct nilfs_sc_info *sci) { + bool thread_is_alive; + spin_lock(&sci->sc_state_lock); sci->sc_seq_accepted = sci->sc_seq_request; + thread_is_alive = (bool)sci->sc_task; spin_unlock(&sci->sc_state_lock); - del_timer_sync(&sci->sc_timer); + + /* + * This function does not race with the log writer thread's + * termination. Therefore, deleting sc_timer, which should not be + * done after the log writer thread exits, can be done safely outside + * the area protected by sc_state_lock. + */ + if (thread_is_alive) + del_timer_sync(&sci->sc_timer); } /** @@ -2383,7 +2413,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) if (mode == SC_LSEG_SR) { sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; sci->sc_seq_done = sci->sc_seq_accepted; - nilfs_segctor_wakeup(sci, err); + nilfs_segctor_wakeup(sci, err, false); sci->sc_flush_request = 0; } else { if (mode == SC_FLUSH_FILE) @@ -2392,7 +2422,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) sci->sc_flush_request &= ~FLUSH_DAT_BIT; /* re-enable timer if checkpoint creation was not done */ - if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task && time_before(jiffies, sci->sc_timer.expires)) add_timer(&sci->sc_timer); } @@ -2582,6 +2612,7 @@ static int nilfs_segctor_thread(void *arg) int timeout = 0; sci->sc_timer_task = current; + timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); /* start sync. */ sci->sc_task = current; @@ -2648,6 +2679,7 @@ static int nilfs_segctor_thread(void *arg) end_thread: /* end sync. */ sci->sc_task = NULL; + del_timer_sync(&sci->sc_timer); wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ spin_unlock(&sci->sc_state_lock); return 0; @@ -2711,7 +2743,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, INIT_LIST_HEAD(&sci->sc_gc_inodes); INIT_LIST_HEAD(&sci->sc_iput_queue); INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); - timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; @@ -2765,6 +2796,13 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); + /* + * Forcibly wake up tasks waiting in nilfs_segctor_sync(), which can + * be called from delayed iput() via nilfs_evict_inode() and can race + * with the above log writer thread termination. + */ + nilfs_segctor_wakeup(sci, 0, true); + if (flush_work(&sci->sc_iput_work)) flag = true; @@ -2790,7 +2828,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) down_write(&nilfs->ns_segctor_sem); - del_timer_sync(&sci->sc_timer); kfree(sci); } diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index e85e13c50d..fa81c59a2a 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -150,7 +150,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); - mutex_lock(&dnotify_group->mark_mutex); + fsnotify_group_lock(dnotify_group); spin_lock(&fsn_mark->lock); prev = &dn_mark->dn; @@ -173,7 +173,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) free = true; } - mutex_unlock(&dnotify_group->mark_mutex); + fsnotify_group_unlock(dnotify_group); if (free) fsnotify_free_mark(fsn_mark); @@ -196,7 +196,7 @@ static __u32 convert_arg(unsigned long arg) if (arg & DN_ATTRIB) new_mask |= FS_ATTRIB; if (arg & DN_RENAME) - new_mask |= FS_DN_RENAME; + new_mask |= FS_RENAME; if (arg & DN_CREATE) new_mask |= (FS_CREATE | FS_MOVED_TO); @@ -306,7 +306,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) new_dn_mark->dn = NULL; /* this is needed to prevent the fcntl/close race described below */ - mutex_lock(&dnotify_group->mark_mutex); + fsnotify_group_lock(dnotify_group); /* add the new_fsn_mark or find an old one. */ fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); @@ -316,7 +316,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) } else { error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0); if (error) { - mutex_unlock(&dnotify_group->mark_mutex); + fsnotify_group_unlock(dnotify_group); goto out_err; } spin_lock(&new_fsn_mark->lock); @@ -365,7 +365,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) if (destroy) fsnotify_detach_mark(fsn_mark); - mutex_unlock(&dnotify_group->mark_mutex); + fsnotify_group_unlock(dnotify_group); if (destroy) fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); @@ -383,7 +383,8 @@ static int __init dnotify_init(void) SLAB_PANIC|SLAB_ACCOUNT); dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); - dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops); + dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, + FSNOTIFY_GROUP_NOFS); if (IS_ERR(dnotify_group)) panic("unable to allocate fsnotify group for dnotify\n"); return 0; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 057abd2cf8..a2a15bc4df 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -18,7 +18,7 @@ #include "fanotify.h" -static bool fanotify_path_equal(struct path *p1, struct path *p2) +static bool fanotify_path_equal(const struct path *p1, const struct path *p2) { return p1->mnt == p2->mnt && p1->dentry == p2->dentry; } @@ -76,8 +76,10 @@ static bool fanotify_info_equal(struct fanotify_info *info1, struct fanotify_info *info2) { if (info1->dir_fh_totlen != info2->dir_fh_totlen || + info1->dir2_fh_totlen != info2->dir2_fh_totlen || info1->file_fh_totlen != info2->file_fh_totlen || - info1->name_len != info2->name_len) + info1->name_len != info2->name_len || + info1->name2_len != info2->name2_len) return false; if (info1->dir_fh_totlen && @@ -85,14 +87,24 @@ static bool fanotify_info_equal(struct fanotify_info *info1, fanotify_info_dir_fh(info2))) return false; + if (info1->dir2_fh_totlen && + !fanotify_fh_equal(fanotify_info_dir2_fh(info1), + fanotify_info_dir2_fh(info2))) + return false; + if (info1->file_fh_totlen && !fanotify_fh_equal(fanotify_info_file_fh(info1), fanotify_info_file_fh(info2))) return false; - return !info1->name_len || - !memcmp(fanotify_info_name(info1), fanotify_info_name(info2), - info1->name_len); + if (info1->name_len && + memcmp(fanotify_info_name(info1), fanotify_info_name(info2), + info1->name_len)) + return false; + + return !info1->name2_len || + !memcmp(fanotify_info_name2(info1), fanotify_info_name2(info2), + info1->name2_len); } static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, @@ -111,6 +123,16 @@ static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, return fanotify_info_equal(info1, info2); } +static bool fanotify_error_event_equal(struct fanotify_error_event *fee1, + struct fanotify_error_event *fee2) +{ + /* Error events against the same file system are always merged. */ + if (!fanotify_fsid_equal(&fee1->fsid, &fee2->fsid)) + return false; + + return true; +} + static bool fanotify_should_merge(struct fanotify_event *old, struct fanotify_event *new) { @@ -131,6 +153,13 @@ static bool fanotify_should_merge(struct fanotify_event *old, if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR)) return false; + /* + * FAN_RENAME event is reported with special info record types, + * so we cannot merge it with other events. + */ + if ((old->mask & FAN_RENAME) != (new->mask & FAN_RENAME)) + return false; + switch (old->type) { case FANOTIFY_EVENT_TYPE_PATH: return fanotify_path_equal(fanotify_event_path(old), @@ -141,6 +170,9 @@ static bool fanotify_should_merge(struct fanotify_event *old, case FANOTIFY_EVENT_TYPE_FID_NAME: return fanotify_name_event_equal(FANOTIFY_NE(old), FANOTIFY_NE(new)); + case FANOTIFY_EVENT_TYPE_FS_ERROR: + return fanotify_error_event_equal(FANOTIFY_EE(old), + FANOTIFY_EE(new)); default: WARN_ON_ONCE(1); } @@ -176,6 +208,10 @@ static int fanotify_merge(struct fsnotify_group *group, break; if (fanotify_should_merge(old, new)) { old->mask |= new->mask; + + if (fanotify_is_error_event(old->mask)) + FANOTIFY_EE(old)->err_count++; + return 1; } } @@ -255,15 +291,17 @@ static int fanotify_get_response(struct fsnotify_group *group, */ static u32 fanotify_group_event_mask(struct fsnotify_group *group, struct fsnotify_iter_info *iter_info, - u32 event_mask, const void *data, - int data_type, struct inode *dir) + u32 *match_mask, u32 event_mask, + const void *data, int data_type, + struct inode *dir) { - __u32 marks_mask = 0, marks_ignored_mask = 0; + __u32 marks_mask = 0, marks_ignore_mask = 0; __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS | FANOTIFY_EVENT_FLAGS; const struct path *path = fsnotify_data_path(data, data_type); unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); struct fsnotify_mark *mark; + bool ondir = event_mask & FAN_ONDIR; int type; pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", @@ -278,37 +316,30 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, return 0; } else if (!(fid_mode & FAN_REPORT_FID)) { /* Do we have a directory inode to report? */ - if (!dir && !(event_mask & FS_ISDIR)) + if (!dir && !ondir) return 0; } - fsnotify_foreach_obj_type(type) { - if (!fsnotify_iter_should_report_type(iter_info, type)) - continue; - mark = iter_info->marks[type]; - - /* Apply ignore mask regardless of ISDIR and ON_CHILD flags */ - marks_ignored_mask |= mark->ignored_mask; - + fsnotify_foreach_iter_mark_type(iter_info, mark, type) { /* - * If the event is on dir and this mark doesn't care about - * events on dir, don't send it! + * Apply ignore mask depending on event flags in ignore mask. */ - if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR)) - continue; + marks_ignore_mask |= + fsnotify_effective_ignore_mask(mark, ondir, type); /* - * If the event is on a child and this mark is on a parent not - * watching children, don't send it! + * Send the event depending on event flags in mark mask. */ - if (type == FSNOTIFY_OBJ_TYPE_PARENT && - !(mark->mask & FS_EVENT_ON_CHILD)) + if (!fsnotify_mask_applicable(mark->mask, ondir, type)) continue; marks_mask |= mark->mask; + + /* Record the mark types of this group that matched the event */ + *match_mask |= 1U << type; } - test_mask = event_mask & marks_mask & ~marks_ignored_mask; + test_mask = event_mask & marks_mask & ~marks_ignore_mask; /* * For dirent modification events (create/delete/move) that do not carry @@ -343,13 +374,23 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, static int fanotify_encode_fh_len(struct inode *inode) { int dwords = 0; + int fh_len; if (!inode) return 0; exportfs_encode_inode_fh(inode, NULL, &dwords, NULL); + fh_len = dwords << 2; - return dwords << 2; + /* + * struct fanotify_error_event might be preallocated and is + * limited to MAX_HANDLE_SZ. This should never happen, but + * safeguard by forcing an invalid file handle. + */ + if (WARN_ON_ONCE(fh_len > MAX_HANDLE_SZ)) + return 0; + + return fh_len; } /* @@ -370,15 +411,21 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, fh->type = FILEID_ROOT; fh->len = 0; fh->flags = 0; + + /* + * Invalid FHs are used by FAN_FS_ERROR for errors not + * linked to any inode. The f_handle won't be reported + * back to userspace. + */ if (!inode) - return 0; + goto out; /* * !gpf means preallocated variable size fh, but fh_len could * be zero in that case if encoding fh len failed. */ err = -ENOENT; - if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4)) + if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4) || fh_len > MAX_HANDLE_SZ) goto out_err; /* No external buffer in a variable size allocated fh */ @@ -403,8 +450,13 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, fh->type = type; fh->len = fh_len; - /* Mix fh into event merge key */ - *hash ^= fanotify_hash_fh(fh); +out: + /* + * Mix fh into event merge key. Hash might be NULL in case of + * unhashed FID events (i.e. FAN_FS_ERROR). + */ + if (hash) + *hash ^= fanotify_hash_fh(fh); return FANOTIFY_FH_HDR_LEN + fh_len; @@ -420,17 +472,41 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, } /* - * The inode to use as identifier when reporting fid depends on the event. - * Report the modified directory inode on dirent modification events. - * Report the "victim" inode otherwise. + * FAN_REPORT_FID is ambiguous in that it reports the fid of the child for + * some events and the fid of the parent for create/delete/move events. + * + * With the FAN_REPORT_TARGET_FID flag, the fid of the child is reported + * also in create/delete/move events in addition to the fid of the parent + * and the name of the child. + */ +static inline bool fanotify_report_child_fid(unsigned int fid_mode, u32 mask) +{ + if (mask & ALL_FSNOTIFY_DIRENT_EVENTS) + return (fid_mode & FAN_REPORT_TARGET_FID); + + return (fid_mode & FAN_REPORT_FID) && !(mask & FAN_ONDIR); +} + +/* + * The inode to use as identifier when reporting fid depends on the event + * and the group flags. + * + * With the group flag FAN_REPORT_TARGET_FID, always report the child fid. + * + * Without the group flag FAN_REPORT_TARGET_FID, report the modified directory + * fid on dirent events and the child fid otherwise. + * * For example: - * FS_ATTRIB reports the child inode even if reported on a watched parent. - * FS_CREATE reports the modified dir inode and not the created inode. + * FS_ATTRIB reports the child fid even if reported on a watched parent. + * FS_CREATE reports the modified dir fid without FAN_REPORT_TARGET_FID. + * and reports the created child fid with FAN_REPORT_TARGET_FID. */ static struct inode *fanotify_fid_inode(u32 event_mask, const void *data, - int data_type, struct inode *dir) + int data_type, struct inode *dir, + unsigned int fid_mode) { - if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) + if ((event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) && + !(fid_mode & FAN_REPORT_TARGET_FID)) return dir; return fsnotify_data_inode(data, data_type); @@ -452,7 +528,7 @@ static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data, if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) return dir; - if (S_ISDIR(inode->i_mode)) + if (inode && S_ISDIR(inode->i_mode)) return inode; return dir; @@ -514,25 +590,34 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, return &ffe->fae; } -static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, +static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir, __kernel_fsid_t *fsid, const struct qstr *name, struct inode *child, + struct dentry *moved, unsigned int *hash, gfp_t gfp) { struct fanotify_name_event *fne; struct fanotify_info *info; struct fanotify_fh *dfh, *ffh; - unsigned int dir_fh_len = fanotify_encode_fh_len(id); + struct inode *dir2 = moved ? d_inode(moved->d_parent) : NULL; + const struct qstr *name2 = moved ? &moved->d_name : NULL; + unsigned int dir_fh_len = fanotify_encode_fh_len(dir); + unsigned int dir2_fh_len = fanotify_encode_fh_len(dir2); unsigned int child_fh_len = fanotify_encode_fh_len(child); - unsigned int size; - - size = sizeof(*fne) + FANOTIFY_FH_HDR_LEN + dir_fh_len; + unsigned long name_len = name ? name->len : 0; + unsigned long name2_len = name2 ? name2->len : 0; + unsigned int len, size; + + /* Reserve terminating null byte even for empty name */ + size = sizeof(*fne) + name_len + name2_len + 2; + if (dir_fh_len) + size += FANOTIFY_FH_HDR_LEN + dir_fh_len; + if (dir2_fh_len) + size += FANOTIFY_FH_HDR_LEN + dir2_fh_len; if (child_fh_len) size += FANOTIFY_FH_HDR_LEN + child_fh_len; - if (name) - size += name->len + 1; fne = kmalloc(size, gfp); if (!fne) return NULL; @@ -542,40 +627,97 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, *hash ^= fanotify_hash_fsid(fsid); info = &fne->info; fanotify_info_init(info); - dfh = fanotify_info_dir_fh(info); - info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, hash, 0); + if (dir_fh_len) { + dfh = fanotify_info_dir_fh(info); + len = fanotify_encode_fh(dfh, dir, dir_fh_len, hash, 0); + fanotify_info_set_dir_fh(info, len); + } + if (dir2_fh_len) { + dfh = fanotify_info_dir2_fh(info); + len = fanotify_encode_fh(dfh, dir2, dir2_fh_len, hash, 0); + fanotify_info_set_dir2_fh(info, len); + } if (child_fh_len) { ffh = fanotify_info_file_fh(info); - info->file_fh_totlen = fanotify_encode_fh(ffh, child, - child_fh_len, hash, 0); + len = fanotify_encode_fh(ffh, child, child_fh_len, hash, 0); + fanotify_info_set_file_fh(info, len); } - if (name) { - long salt = name->len; - + if (name_len) { fanotify_info_copy_name(info, name); - *hash ^= full_name_hash((void *)salt, name->name, name->len); + *hash ^= full_name_hash((void *)name_len, name->name, name_len); + } + if (name2_len) { + fanotify_info_copy_name2(info, name2); + *hash ^= full_name_hash((void *)name2_len, name2->name, + name2_len); } - pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", - __func__, id->i_ino, size, dir_fh_len, child_fh_len, + pr_debug("%s: size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", + __func__, size, dir_fh_len, child_fh_len, info->name_len, info->name_len, fanotify_info_name(info)); + if (dir2_fh_len) { + pr_debug("%s: dir2_fh_len=%u name2_len=%u name2='%.*s'\n", + __func__, dir2_fh_len, info->name2_len, + info->name2_len, fanotify_info_name2(info)); + } + return &fne->fae; } -static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, - u32 mask, const void *data, - int data_type, struct inode *dir, - const struct qstr *file_name, - __kernel_fsid_t *fsid) +static struct fanotify_event *fanotify_alloc_error_event( + struct fsnotify_group *group, + __kernel_fsid_t *fsid, + const void *data, int data_type, + unsigned int *hash) +{ + struct fs_error_report *report = + fsnotify_data_error_report(data, data_type); + struct inode *inode; + struct fanotify_error_event *fee; + int fh_len; + + if (WARN_ON_ONCE(!report)) + return NULL; + + fee = mempool_alloc(&group->fanotify_data.error_events_pool, GFP_NOFS); + if (!fee) + return NULL; + + fee->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR; + fee->error = report->error; + fee->err_count = 1; + fee->fsid = *fsid; + + inode = report->inode; + fh_len = fanotify_encode_fh_len(inode); + + /* Bad fh_len. Fallback to using an invalid fh. Should never happen. */ + if (!fh_len && inode) + inode = NULL; + + fanotify_encode_fh(&fee->object_fh, inode, fh_len, NULL, 0); + + *hash ^= fanotify_hash_fsid(fsid); + + return &fee->fae; +} + +static struct fanotify_event *fanotify_alloc_event( + struct fsnotify_group *group, + u32 mask, const void *data, int data_type, + struct inode *dir, const struct qstr *file_name, + __kernel_fsid_t *fsid, u32 match_mask) { struct fanotify_event *event = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; - struct inode *id = fanotify_fid_inode(mask, data, data_type, dir); + unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); + struct inode *id = fanotify_fid_inode(mask, data, data_type, dir, + fid_mode); struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir); const struct path *path = fsnotify_data_path(data, data_type); - unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); struct mem_cgroup *old_memcg; + struct dentry *moved = NULL; struct inode *child = NULL; bool name_event = false; unsigned int hash = 0; @@ -584,11 +726,10 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { /* - * With both flags FAN_REPORT_DIR_FID and FAN_REPORT_FID, we - * report the child fid for events reported on a non-dir child + * For certain events and group flags, report the child fid * in addition to reporting the parent fid and maybe child name. */ - if ((fid_mode & FAN_REPORT_FID) && id != dirid && !ondir) + if (fanotify_report_child_fid(fid_mode, mask) && id != dirid) child = id; id = dirid; @@ -612,6 +753,38 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) { name_event = true; } + + /* + * In the special case of FAN_RENAME event, use the match_mask + * to determine if we need to report only the old parent+name, + * only the new parent+name or both. + * 'dirid' and 'file_name' are the old parent+name and + * 'moved' has the new parent+name. + */ + if (mask & FAN_RENAME) { + bool report_old, report_new; + + if (WARN_ON_ONCE(!match_mask)) + return NULL; + + /* Report both old and new parent+name if sb watching */ + report_old = report_new = + match_mask & (1U << FSNOTIFY_ITER_TYPE_SB); + report_old |= + match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE); + report_new |= + match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE2); + + if (!report_old) { + /* Do not report old parent+name */ + dirid = NULL; + file_name = NULL; + } + if (report_new) { + /* Report new parent+name */ + moved = fsnotify_data_dentry(data, data_type); + } + } } /* @@ -630,9 +803,12 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, if (fanotify_is_perm_event(mask)) { event = fanotify_alloc_perm_event(path, gfp); - } else if (name_event && (file_name || child)) { - event = fanotify_alloc_name_event(id, fsid, file_name, child, - &hash, gfp); + } else if (fanotify_is_error_event(mask)) { + event = fanotify_alloc_error_event(group, fsid, data, + data_type, &hash); + } else if (name_event && (file_name || moved || child)) { + event = fanotify_alloc_name_event(dirid, fsid, file_name, child, + moved, &hash, gfp); } else if (fid_mode) { event = fanotify_alloc_fid_event(id, fsid, &hash, gfp); } else { @@ -664,16 +840,14 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, */ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) { + struct fsnotify_mark *mark; int type; __kernel_fsid_t fsid = {}; - fsnotify_foreach_obj_type(type) { + fsnotify_foreach_iter_mark_type(iter_info, mark, type) { struct fsnotify_mark_connector *conn; - if (!fsnotify_iter_should_report_type(iter_info, type)) - continue; - - conn = READ_ONCE(iter_info->marks[type]->connector); + conn = READ_ONCE(mark->connector); /* Mark is just getting destroyed or created? */ if (!conn) continue; @@ -702,6 +876,9 @@ static void fanotify_insert_event(struct fsnotify_group *group, assert_spin_locked(&group->notification_lock); + if (!fanotify_is_hashed_event(event->mask)) + return; + pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, group, event, bucket); @@ -718,6 +895,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, struct fanotify_event *event; struct fsnotify_event *fsn_event; __kernel_fsid_t fsid = {}; + u32 match_mask = 0; BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); @@ -738,15 +916,18 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); + BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); + BUILD_BUG_ON(FAN_RENAME != FS_RENAME); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21); - mask = fanotify_group_event_mask(group, iter_info, mask, data, - data_type, dir); + mask = fanotify_group_event_mask(group, iter_info, &match_mask, + mask, data, data_type, dir); if (!mask) return 0; - pr_debug("%s: group=%p mask=%x\n", __func__, group, mask); + pr_debug("%s: group=%p mask=%x report_mask=%x\n", __func__, + group, mask, match_mask); if (fanotify_is_perm_event(mask)) { /* @@ -765,7 +946,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, } event = fanotify_alloc_event(group, mask, data, data_type, dir, - file_name, &fsid); + file_name, &fsid, match_mask); ret = -ENOMEM; if (unlikely(!event)) { /* @@ -778,9 +959,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, } fsn_event = &event->fse; - ret = fsnotify_add_event(group, fsn_event, fanotify_merge, - fanotify_is_hashed_event(mask) ? - fanotify_insert_event : NULL); + ret = fsnotify_insert_event(group, fsn_event, fanotify_merge, + fanotify_insert_event); if (ret) { /* Permission events shouldn't be merged */ BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); @@ -805,6 +985,9 @@ static void fanotify_free_group_priv(struct fsnotify_group *group) if (group->fanotify_data.ucounts) dec_ucount(group->fanotify_data.ucounts, UCOUNT_FANOTIFY_GROUPS); + + if (mempool_initialized(&group->fanotify_data.error_events_pool)) + mempool_exit(&group->fanotify_data.error_events_pool); } static void fanotify_free_path_event(struct fanotify_event *event) @@ -833,7 +1016,16 @@ static void fanotify_free_name_event(struct fanotify_event *event) kfree(FANOTIFY_NE(event)); } -static void fanotify_free_event(struct fsnotify_event *fsn_event) +static void fanotify_free_error_event(struct fsnotify_group *group, + struct fanotify_event *event) +{ + struct fanotify_error_event *fee = FANOTIFY_EE(event); + + mempool_free(fee, &group->fanotify_data.error_events_pool); +} + +static void fanotify_free_event(struct fsnotify_group *group, + struct fsnotify_event *fsn_event) { struct fanotify_event *event; @@ -855,6 +1047,9 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) case FANOTIFY_EVENT_TYPE_OVERFLOW: kfree(event); break; + case FANOTIFY_EVENT_TYPE_FS_ERROR: + fanotify_free_error_event(group, event); + break; default: WARN_ON_ONCE(1); } diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 4a5e555dc3..57f51a9a30 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -40,15 +40,45 @@ struct fanotify_fh { struct fanotify_info { /* size of dir_fh/file_fh including fanotify_fh hdr size */ u8 dir_fh_totlen; + u8 dir2_fh_totlen; u8 file_fh_totlen; u8 name_len; - u8 pad; + u8 name2_len; + u8 pad[3]; unsigned char buf[]; /* * (struct fanotify_fh) dir_fh starts at buf[0] - * (optional) file_fh starts at buf[dir_fh_totlen] - * name starts at buf[dir_fh_totlen + file_fh_totlen] + * (optional) dir2_fh starts at buf[dir_fh_totlen] + * (optional) file_fh starts at buf[dir_fh_totlen + dir2_fh_totlen] + * name starts at buf[dir_fh_totlen + dir2_fh_totlen + file_fh_totlen] + * ... */ +#define FANOTIFY_DIR_FH_SIZE(info) ((info)->dir_fh_totlen) +#define FANOTIFY_DIR2_FH_SIZE(info) ((info)->dir2_fh_totlen) +#define FANOTIFY_FILE_FH_SIZE(info) ((info)->file_fh_totlen) +#define FANOTIFY_NAME_SIZE(info) ((info)->name_len + 1) +#define FANOTIFY_NAME2_SIZE(info) ((info)->name2_len + 1) + +#define FANOTIFY_DIR_FH_OFFSET(info) 0 +#define FANOTIFY_DIR2_FH_OFFSET(info) \ + (FANOTIFY_DIR_FH_OFFSET(info) + FANOTIFY_DIR_FH_SIZE(info)) +#define FANOTIFY_FILE_FH_OFFSET(info) \ + (FANOTIFY_DIR2_FH_OFFSET(info) + FANOTIFY_DIR2_FH_SIZE(info)) +#define FANOTIFY_NAME_OFFSET(info) \ + (FANOTIFY_FILE_FH_OFFSET(info) + FANOTIFY_FILE_FH_SIZE(info)) +#define FANOTIFY_NAME2_OFFSET(info) \ + (FANOTIFY_NAME_OFFSET(info) + FANOTIFY_NAME_SIZE(info)) + +#define FANOTIFY_DIR_FH_BUF(info) \ + ((info)->buf + FANOTIFY_DIR_FH_OFFSET(info)) +#define FANOTIFY_DIR2_FH_BUF(info) \ + ((info)->buf + FANOTIFY_DIR2_FH_OFFSET(info)) +#define FANOTIFY_FILE_FH_BUF(info) \ + ((info)->buf + FANOTIFY_FILE_FH_OFFSET(info)) +#define FANOTIFY_NAME_BUF(info) \ + ((info)->buf + FANOTIFY_NAME_OFFSET(info)) +#define FANOTIFY_NAME2_BUF(info) \ + ((info)->buf + FANOTIFY_NAME2_OFFSET(info)) } __aligned(4); static inline bool fanotify_fh_has_ext_buf(struct fanotify_fh *fh) @@ -87,7 +117,21 @@ static inline struct fanotify_fh *fanotify_info_dir_fh(struct fanotify_info *inf { BUILD_BUG_ON(offsetof(struct fanotify_info, buf) % 4); - return (struct fanotify_fh *)info->buf; + return (struct fanotify_fh *)FANOTIFY_DIR_FH_BUF(info); +} + +static inline int fanotify_info_dir2_fh_len(struct fanotify_info *info) +{ + if (!info->dir2_fh_totlen || + WARN_ON_ONCE(info->dir2_fh_totlen < FANOTIFY_FH_HDR_LEN)) + return 0; + + return info->dir2_fh_totlen - FANOTIFY_FH_HDR_LEN; +} + +static inline struct fanotify_fh *fanotify_info_dir2_fh(struct fanotify_info *info) +{ + return (struct fanotify_fh *)FANOTIFY_DIR2_FH_BUF(info); } static inline int fanotify_info_file_fh_len(struct fanotify_info *info) @@ -101,32 +145,90 @@ static inline int fanotify_info_file_fh_len(struct fanotify_info *info) static inline struct fanotify_fh *fanotify_info_file_fh(struct fanotify_info *info) { - return (struct fanotify_fh *)(info->buf + info->dir_fh_totlen); + return (struct fanotify_fh *)FANOTIFY_FILE_FH_BUF(info); +} + +static inline char *fanotify_info_name(struct fanotify_info *info) +{ + if (!info->name_len) + return NULL; + + return FANOTIFY_NAME_BUF(info); } -static inline const char *fanotify_info_name(struct fanotify_info *info) +static inline char *fanotify_info_name2(struct fanotify_info *info) { - return info->buf + info->dir_fh_totlen + info->file_fh_totlen; + if (!info->name2_len) + return NULL; + + return FANOTIFY_NAME2_BUF(info); } static inline void fanotify_info_init(struct fanotify_info *info) { + BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN + MAX_HANDLE_SZ > U8_MAX); + BUILD_BUG_ON(NAME_MAX > U8_MAX); + info->dir_fh_totlen = 0; + info->dir2_fh_totlen = 0; info->file_fh_totlen = 0; info->name_len = 0; + info->name2_len = 0; +} + +/* These set/copy helpers MUST be called by order */ +static inline void fanotify_info_set_dir_fh(struct fanotify_info *info, + unsigned int totlen) +{ + if (WARN_ON_ONCE(info->dir2_fh_totlen > 0) || + WARN_ON_ONCE(info->file_fh_totlen > 0) || + WARN_ON_ONCE(info->name_len > 0) || + WARN_ON_ONCE(info->name2_len > 0)) + return; + + info->dir_fh_totlen = totlen; +} + +static inline void fanotify_info_set_dir2_fh(struct fanotify_info *info, + unsigned int totlen) +{ + if (WARN_ON_ONCE(info->file_fh_totlen > 0) || + WARN_ON_ONCE(info->name_len > 0) || + WARN_ON_ONCE(info->name2_len > 0)) + return; + + info->dir2_fh_totlen = totlen; } -static inline unsigned int fanotify_info_len(struct fanotify_info *info) +static inline void fanotify_info_set_file_fh(struct fanotify_info *info, + unsigned int totlen) { - return info->dir_fh_totlen + info->file_fh_totlen + info->name_len; + if (WARN_ON_ONCE(info->name_len > 0) || + WARN_ON_ONCE(info->name2_len > 0)) + return; + + info->file_fh_totlen = totlen; } static inline void fanotify_info_copy_name(struct fanotify_info *info, const struct qstr *name) { + if (WARN_ON_ONCE(name->len > NAME_MAX) || + WARN_ON_ONCE(info->name2_len > 0)) + return; + info->name_len = name->len; - strcpy(info->buf + info->dir_fh_totlen + info->file_fh_totlen, - name->name); + strcpy(fanotify_info_name(info), name->name); +} + +static inline void fanotify_info_copy_name2(struct fanotify_info *info, + const struct qstr *name) +{ + if (WARN_ON_ONCE(name->len > NAME_MAX)) + return; + + info->name2_len = name->len; + strcpy(fanotify_info_name2(info), name->name); } /* @@ -141,6 +243,7 @@ enum fanotify_event_type { FANOTIFY_EVENT_TYPE_PATH, FANOTIFY_EVENT_TYPE_PATH_PERM, FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */ + FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */ __FANOTIFY_EVENT_TYPE_NUM }; @@ -170,12 +273,18 @@ static inline void fanotify_init_event(struct fanotify_event *event, event->pid = NULL; } +#define FANOTIFY_INLINE_FH(name, size) \ +struct { \ + struct fanotify_fh (name); \ + /* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \ + unsigned char _inline_fh_buf[(size)]; \ +} + struct fanotify_fid_event { struct fanotify_event fae; __kernel_fsid_t fsid; - struct fanotify_fh object_fh; - /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */ - unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN]; + + FANOTIFY_INLINE_FH(object_fh, FANOTIFY_INLINE_FH_LEN); }; static inline struct fanotify_fid_event * @@ -196,12 +305,30 @@ FANOTIFY_NE(struct fanotify_event *event) return container_of(event, struct fanotify_name_event, fae); } +struct fanotify_error_event { + struct fanotify_event fae; + s32 error; /* Error reported by the Filesystem. */ + u32 err_count; /* Suppressed errors count */ + + __kernel_fsid_t fsid; /* FSID this error refers to. */ + + FANOTIFY_INLINE_FH(object_fh, MAX_HANDLE_SZ); +}; + +static inline struct fanotify_error_event * +FANOTIFY_EE(struct fanotify_event *event) +{ + return container_of(event, struct fanotify_error_event, fae); +} + static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event) { if (event->type == FANOTIFY_EVENT_TYPE_FID) return &FANOTIFY_FE(event)->fsid; else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME) return &FANOTIFY_NE(event)->fsid; + else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR) + return &FANOTIFY_EE(event)->fsid; else return NULL; } @@ -213,6 +340,8 @@ static inline struct fanotify_fh *fanotify_event_object_fh( return &FANOTIFY_FE(event)->object_fh; else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME) return fanotify_info_file_fh(&FANOTIFY_NE(event)->info); + else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR) + return &FANOTIFY_EE(event)->object_fh; else return NULL; } @@ -244,6 +373,37 @@ static inline int fanotify_event_dir_fh_len(struct fanotify_event *event) return info ? fanotify_info_dir_fh_len(info) : 0; } +static inline int fanotify_event_dir2_fh_len(struct fanotify_event *event) +{ + struct fanotify_info *info = fanotify_event_info(event); + + return info ? fanotify_info_dir2_fh_len(info) : 0; +} + +static inline bool fanotify_event_has_object_fh(struct fanotify_event *event) +{ + /* For error events, even zeroed fh are reported. */ + if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR) + return true; + return fanotify_event_object_fh_len(event) > 0; +} + +static inline bool fanotify_event_has_dir_fh(struct fanotify_event *event) +{ + return fanotify_event_dir_fh_len(event) > 0; +} + +static inline bool fanotify_event_has_dir2_fh(struct fanotify_event *event) +{ + return fanotify_event_dir2_fh_len(event) > 0; +} + +static inline bool fanotify_event_has_any_dir_fh(struct fanotify_event *event) +{ + return fanotify_event_has_dir_fh(event) || + fanotify_event_has_dir2_fh(event); +} + struct fanotify_path_event { struct fanotify_event fae; struct path path; @@ -287,13 +447,12 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct fanotify_event, fse); } -static inline bool fanotify_event_has_path(struct fanotify_event *event) +static inline bool fanotify_is_error_event(u32 mask) { - return event->type == FANOTIFY_EVENT_TYPE_PATH || - event->type == FANOTIFY_EVENT_TYPE_PATH_PERM; + return mask & FAN_FS_ERROR; } -static inline struct path *fanotify_event_path(struct fanotify_event *event) +static inline const struct path *fanotify_event_path(struct fanotify_event *event) { if (event->type == FANOTIFY_EVENT_TYPE_PATH) return &FANOTIFY_PE(event)->path; @@ -315,7 +474,8 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event) */ static inline bool fanotify_is_hashed_event(u32 mask) { - return !fanotify_is_perm_event(mask) && !(mask & FS_Q_OVERFLOW); + return !(fanotify_is_perm_event(mask) || + fsnotify_is_overflow_event(mask)); } static inline unsigned int fanotify_event_hash_bucket( @@ -324,3 +484,17 @@ static inline unsigned int fanotify_event_hash_bucket( { return event->hash & FANOTIFY_HTABLE_MASK; } + +static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark) +{ + unsigned int mflags = 0; + + if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) + mflags |= FAN_MARK_IGNORED_SURV_MODIFY; + if (mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF) + mflags |= FAN_MARK_EVICTABLE; + if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) + mflags |= FAN_MARK_IGNORE; + + return mflags; +} diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 0e2a0eb7cb..d93418f213 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -30,6 +30,7 @@ #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 #define FANOTIFY_OLD_DEFAULT_MAX_MARKS 8192 #define FANOTIFY_DEFAULT_MAX_GROUPS 128 +#define FANOTIFY_DEFAULT_FEE_POOL_SIZE 32 /* * Legacy fanotify marks limits (8192) is per group and we introduced a tunable @@ -114,6 +115,8 @@ struct kmem_cache *fanotify_perm_event_cachep __read_mostly; (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) #define FANOTIFY_PIDFD_INFO_HDR_LEN \ sizeof(struct fanotify_event_info_pidfd) +#define FANOTIFY_ERROR_INFO_LEN \ + (sizeof(struct fanotify_event_info_error)) static int fanotify_fid_info_len(int fh_len, int name_len) { @@ -126,17 +129,42 @@ static int fanotify_fid_info_len(int fh_len, int name_len) FANOTIFY_EVENT_ALIGN); } -static int fanotify_event_info_len(unsigned int info_mode, - struct fanotify_event *event) +/* FAN_RENAME may have one or two dir+name info records */ +static int fanotify_dir_name_info_len(struct fanotify_event *event) { struct fanotify_info *info = fanotify_event_info(event); int dir_fh_len = fanotify_event_dir_fh_len(event); - int fh_len = fanotify_event_object_fh_len(event); + int dir2_fh_len = fanotify_event_dir2_fh_len(event); int info_len = 0; + + if (dir_fh_len) + info_len += fanotify_fid_info_len(dir_fh_len, + info->name_len); + if (dir2_fh_len) + info_len += fanotify_fid_info_len(dir2_fh_len, + info->name2_len); + + return info_len; +} + +static size_t fanotify_event_len(unsigned int info_mode, + struct fanotify_event *event) +{ + size_t event_len = FAN_EVENT_METADATA_LEN; + struct fanotify_info *info; + int fh_len; int dot_len = 0; - if (dir_fh_len) { - info_len += fanotify_fid_info_len(dir_fh_len, info->name_len); + if (!info_mode) + return event_len; + + if (fanotify_is_error_event(event->mask)) + event_len += FANOTIFY_ERROR_INFO_LEN; + + info = fanotify_event_info(event); + + if (fanotify_event_has_any_dir_fh(event)) { + event_len += fanotify_dir_name_info_len(event); } else if ((info_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) { /* @@ -147,12 +175,14 @@ static int fanotify_event_info_len(unsigned int info_mode, } if (info_mode & FAN_REPORT_PIDFD) - info_len += FANOTIFY_PIDFD_INFO_HDR_LEN; + event_len += FANOTIFY_PIDFD_INFO_HDR_LEN; - if (fh_len) - info_len += fanotify_fid_info_len(fh_len, dot_len); + if (fanotify_event_has_object_fh(event)) { + fh_len = fanotify_event_object_fh_len(event); + event_len += fanotify_fid_info_len(fh_len, dot_len); + } - return info_len; + return event_len; } /* @@ -181,7 +211,7 @@ static void fanotify_unhash_event(struct fsnotify_group *group, static struct fanotify_event *get_one_event(struct fsnotify_group *group, size_t count) { - size_t event_size = FAN_EVENT_METADATA_LEN; + size_t event_size; struct fanotify_event *event = NULL; struct fsnotify_event *fsn_event; unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); @@ -194,8 +224,7 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group, goto out; event = FANOTIFY_E(fsn_event); - if (info_mode) - event_size += fanotify_event_info_len(info_mode, event); + event_size = fanotify_event_len(info_mode, event); if (event_size > count) { event = ERR_PTR(-EINVAL); @@ -216,7 +245,7 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group, return event; } -static int create_fd(struct fsnotify_group *group, struct path *path, +static int create_fd(struct fsnotify_group *group, const struct path *path, struct file **file) { int client_fd; @@ -231,7 +260,7 @@ static int create_fd(struct fsnotify_group *group, struct path *path, * originally opened O_WRONLY. */ new_file = dentry_open(path, - group->fanotify_data.f_flags | FMODE_NONOTIFY, + group->fanotify_data.f_flags | __FMODE_NONOTIFY, current_cred()); if (IS_ERR(new_file)) { /* @@ -316,6 +345,27 @@ static int process_access_response(struct fsnotify_group *group, return -ENOENT; } +static size_t copy_error_info_to_user(struct fanotify_event *event, + char __user *buf, int count) +{ + struct fanotify_event_info_error info = { }; + struct fanotify_error_event *fee = FANOTIFY_EE(event); + + info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR; + info.hdr.len = FANOTIFY_ERROR_INFO_LEN; + + if (WARN_ON(count < info.hdr.len)) + return -EFAULT; + + info.error = fee->error; + info.error_count = fee->err_count; + + if (copy_to_user(buf, &info, sizeof(info))) + return -EFAULT; + + return info.hdr.len; +} + static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, int info_type, const char *name, size_t name_len, @@ -331,9 +381,6 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n", __func__, fh_len, name_len, info_len, count); - if (!fh_len) - return 0; - if (WARN_ON_ONCE(len < sizeof(info) || len > count)) return -EFAULT; @@ -348,6 +395,8 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, return -EFAULT; break; case FAN_EVENT_INFO_TYPE_DFID_NAME: + case FAN_EVENT_INFO_TYPE_OLD_DFID_NAME: + case FAN_EVENT_INFO_TYPE_NEW_DFID_NAME: if (WARN_ON_ONCE(!name || !name_len)) return -EFAULT; break; @@ -368,6 +417,11 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, handle.handle_type = fh->type; handle.handle_bytes = fh_len; + + /* Mangle handle_type for bad file_handle */ + if (!fh_len) + handle.handle_type = FILEID_INVALID; + if (copy_to_user(buf, &handle, sizeof(handle))) return -EFAULT; @@ -442,11 +496,19 @@ static int copy_info_records_to_user(struct fanotify_event *event, unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; /* - * Event info records order is as follows: dir fid + name, child fid. + * Event info records order is as follows: + * 1. dir fid + name + * 2. (optional) new dir fid + new name + * 3. (optional) child fid */ - if (fanotify_event_dir_fh_len(event)) { + if (fanotify_event_has_dir_fh(event)) { info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : FAN_EVENT_INFO_TYPE_DFID; + + /* FAN_RENAME uses special info types */ + if (event->mask & FAN_RENAME) + info_type = FAN_EVENT_INFO_TYPE_OLD_DFID_NAME; + ret = copy_fid_info_to_user(fanotify_event_fsid(event), fanotify_info_dir_fh(info), info_type, @@ -460,7 +522,23 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } - if (fanotify_event_object_fh_len(event)) { + /* New dir fid+name may be reported in addition to old dir fid+name */ + if (fanotify_event_has_dir2_fh(event)) { + info_type = FAN_EVENT_INFO_TYPE_NEW_DFID_NAME; + ret = copy_fid_info_to_user(fanotify_event_fsid(event), + fanotify_info_dir2_fh(info), + info_type, + fanotify_info_name2(info), + info->name2_len, buf, count); + if (ret < 0) + return ret; + + buf += ret; + count -= ret; + total_bytes += ret; + } + + if (fanotify_event_has_object_fh(event)) { const char *dot = NULL; int dot_len = 0; @@ -520,6 +598,15 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } + if (fanotify_is_error_event(event->mask)) { + ret = copy_error_info_to_user(event, buf, count); + if (ret < 0) + return ret; + buf += ret; + count -= ret; + total_bytes += ret; + } + return total_bytes; } @@ -528,7 +615,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, char __user *buf, size_t count) { struct fanotify_event_metadata metadata; - struct path *path = fanotify_event_path(event); + const struct path *path = fanotify_event_path(event); struct fanotify_info *info = fanotify_event_info(event); unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; @@ -537,8 +624,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - metadata.event_len = FAN_EVENT_METADATA_LEN + - fanotify_event_info_len(info_mode, event); + metadata.event_len = fanotify_event_len(info_mode, event); metadata.metadata_len = FAN_EVENT_METADATA_LEN; metadata.vers = FANOTIFY_METADATA_VERSION; metadata.reserved = 0; @@ -913,27 +999,28 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u32 mask, unsigned int flags, __u32 umask, int *destroy) { - __u32 oldmask = 0; + __u32 oldmask, newmask; /* umask bits cannot be removed by user */ mask &= ~umask; spin_lock(&fsn_mark->lock); - if (!(flags & FAN_MARK_IGNORED_MASK)) { - oldmask = fsn_mark->mask; + oldmask = fsnotify_calc_mask(fsn_mark); + if (!(flags & FANOTIFY_MARK_IGNORE_BITS)) { fsn_mark->mask &= ~mask; } else { - fsn_mark->ignored_mask &= ~mask; + fsn_mark->ignore_mask &= ~mask; } + newmask = fsnotify_calc_mask(fsn_mark); /* * We need to keep the mark around even if remaining mask cannot * result in any events (e.g. mask == FAN_ONDIR) to support incremenal * changes to the mask. * Destroy mark when only umask bits remain. */ - *destroy = !((fsn_mark->mask | fsn_mark->ignored_mask) & ~umask); + *destroy = !((fsn_mark->mask | fsn_mark->ignore_mask) & ~umask); spin_unlock(&fsn_mark->lock); - return mask & oldmask; + return oldmask & ~newmask; } static int fanotify_remove_mark(struct fsnotify_group *group, @@ -944,10 +1031,10 @@ static int fanotify_remove_mark(struct fsnotify_group *group, __u32 removed; int destroy_mark; - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); return -ENOENT; } @@ -957,7 +1044,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group, fsnotify_recalc_mask(fsn_mark->connector); if (destroy_mark) fsnotify_detach_mark(fsn_mark); - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); if (destroy_mark) fsnotify_free_mark(fsn_mark); @@ -990,29 +1077,72 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, flags, umask); } -static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, - __u32 mask, - unsigned int flags) +static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark, + unsigned int fan_flags) { - __u32 oldmask = -1; + bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE); + unsigned int ignore = fan_flags & FANOTIFY_MARK_IGNORE_BITS; + bool recalc = false; + + /* + * When using FAN_MARK_IGNORE for the first time, mark starts using + * independent event flags in ignore mask. After that, trying to + * update the ignore mask with the old FAN_MARK_IGNORED_MASK API + * will result in EEXIST error. + */ + if (ignore == FAN_MARK_IGNORE) + fsn_mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS; + + /* + * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to + * the removal of the FS_MODIFY bit in calculated mask if it was set + * because of an ignore mask that is now going to survive FS_MODIFY. + */ + if (ignore && (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && + !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) { + fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; + if (!(fsn_mark->mask & FS_MODIFY)) + recalc = true; + } + + if (fsn_mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE || + want_iref == !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) + return recalc; + + /* + * NO_IREF may be removed from a mark, but not added. + * When removed, fsnotify_recalc_mask() will take the inode ref. + */ + WARN_ON_ONCE(!want_iref); + fsn_mark->flags &= ~FSNOTIFY_MARK_FLAG_NO_IREF; + + return true; +} + +static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, + __u32 mask, unsigned int fan_flags) +{ + bool recalc; spin_lock(&fsn_mark->lock); - if (!(flags & FAN_MARK_IGNORED_MASK)) { - oldmask = fsn_mark->mask; + if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS)) fsn_mark->mask |= mask; - } else { - fsn_mark->ignored_mask |= mask; - if (flags & FAN_MARK_IGNORED_SURV_MODIFY) - fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; - } + else + fsn_mark->ignore_mask |= mask; + + recalc = fsnotify_calc_mask(fsn_mark) & + ~fsnotify_conn_mask(fsn_mark->connector); + + recalc |= fanotify_mark_update_flags(fsn_mark, fan_flags); spin_unlock(&fsn_mark->lock); - return mask & ~oldmask; + return recalc; } static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, - unsigned int type, + unsigned int obj_type, + unsigned int fan_flags, __kernel_fsid_t *fsid) { struct ucounts *ucounts = group->fanotify_data.ucounts; @@ -1035,7 +1165,10 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, } fsnotify_init_mark(mark, group); - ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid); + if (fan_flags & FAN_MARK_EVICTABLE) + mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF; + + ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0, fsid); if (ret) { fsnotify_put_mark(mark); goto out_dec_ucounts; @@ -1049,31 +1182,94 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, return ERR_PTR(ret); } +static int fanotify_group_init_error_pool(struct fsnotify_group *group) +{ + if (mempool_initialized(&group->fanotify_data.error_events_pool)) + return 0; + + return mempool_init_kmalloc_pool(&group->fanotify_data.error_events_pool, + FANOTIFY_DEFAULT_FEE_POOL_SIZE, + sizeof(struct fanotify_error_event)); +} + +static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, + unsigned int fan_flags) +{ + /* + * Non evictable mark cannot be downgraded to evictable mark. + */ + if (fan_flags & FAN_MARK_EVICTABLE && + !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) + return -EEXIST; + + /* + * New ignore mask semantics cannot be downgraded to old semantics. + */ + if (fan_flags & FAN_MARK_IGNORED_MASK && + fsn_mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) + return -EEXIST; + + /* + * An ignore mask that survives modify could never be downgraded to not + * survive modify. With new FAN_MARK_IGNORE semantics we make that rule + * explicit and return an error when trying to update the ignore mask + * without the original FAN_MARK_IGNORED_SURV_MODIFY value. + */ + if (fan_flags & FAN_MARK_IGNORE && + !(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && + fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) + return -EEXIST; + + return 0; +} static int fanotify_add_mark(struct fsnotify_group *group, - fsnotify_connp_t *connp, unsigned int type, - __u32 mask, unsigned int flags, + fsnotify_connp_t *connp, unsigned int obj_type, + __u32 mask, unsigned int fan_flags, __kernel_fsid_t *fsid) { struct fsnotify_mark *fsn_mark; - __u32 added; + bool recalc; + int ret = 0; - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); + fsn_mark = fanotify_add_new_mark(group, connp, obj_type, + fan_flags, fsid); if (IS_ERR(fsn_mark)) { - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); return PTR_ERR(fsn_mark); } } - added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - if (added & ~fsnotify_conn_mask(fsn_mark->connector)) + + /* + * Check if requested mark flags conflict with an existing mark flags. + */ + ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags); + if (ret) + goto out; + + /* + * Error events are pre-allocated per group, only if strictly + * needed (i.e. FAN_FS_ERROR was requested). + */ + if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS) && + (mask & FAN_FS_ERROR)) { + ret = fanotify_group_init_error_pool(group); + if (ret) + goto out; + } + + recalc = fanotify_mark_add_to_mask(fsn_mark, mask, fan_flags); + if (recalc) fsnotify_recalc_mask(fsn_mark->connector); - mutex_unlock(&group->mark_mutex); + +out: + fsnotify_group_unlock(group); fsnotify_put_mark(fsn_mark); - return 0; + return ret; } static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, @@ -1100,10 +1296,10 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, /* * If some other task has this inode open for write we should not add - * an ignored mark, unless that ignored mark is supposed to survive + * an ignore mask, unless that ignore mask is supposed to survive * modification changes anyway. */ - if ((flags & FAN_MARK_IGNORED_MASK) && + if ((flags & FANOTIFY_MARK_IGNORE_BITS) && !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && inode_is_open_for_write(inode)) return 0; @@ -1207,14 +1403,24 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID)) return -EINVAL; - f_flags = O_RDWR | FMODE_NONOTIFY; + /* + * FAN_REPORT_TARGET_FID requires FAN_REPORT_NAME and FAN_REPORT_FID + * and is used as an indication to report both dir and child fid on all + * dirent events. + */ + if ((fid_mode & FAN_REPORT_TARGET_FID) && + (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) + return -EINVAL; + + f_flags = O_RDWR | __FMODE_NONOTIFY; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; if (flags & FAN_NONBLOCK) f_flags |= O_NONBLOCK; /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ - group = fsnotify_alloc_user_group(&fanotify_fsnotify_ops); + group = fsnotify_alloc_group(&fanotify_fsnotify_ops, + FSNOTIFY_GROUP_USER | FSNOTIFY_GROUP_NOFS); if (IS_ERR(group)) { return PTR_ERR(group); } @@ -1295,16 +1501,15 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) return fd; } -/* Check if filesystem can encode a unique fid */ -static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) +static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid) { __kernel_fsid_t root_fsid; int err; /* - * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). + * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse). */ - err = vfs_get_fsid(path->dentry, fsid); + err = vfs_get_fsid(dentry, fsid); if (err) return err; @@ -1312,10 +1517,10 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) return -ENODEV; /* - * Make sure path is not inside a filesystem subvolume (e.g. btrfs) + * Make sure dentry is not of a filesystem subvolume (e.g. btrfs) * which uses a different fsid than sb root. */ - err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid); + err = vfs_get_fsid(dentry->d_sb->s_root, &root_fsid); if (err) return err; @@ -1323,6 +1528,12 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) root_fsid.val[1] != fsid->val[1]) return -EXDEV; + return 0; +} + +/* Check if filesystem can encode a unique fid */ +static int fanotify_test_fid(struct dentry *dentry) +{ /* * We need to make sure that the file system supports at least * encoding a file handle so user can use name_to_handle_at() to @@ -1330,17 +1541,22 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) * objects. However, name_to_handle_at() requires that the * filesystem also supports decoding file handles. */ - if (!path->dentry->d_sb->s_export_op || - !path->dentry->d_sb->s_export_op->fh_to_dentry) + if (!dentry->d_sb->s_export_op || + !dentry->d_sb->s_export_op->fh_to_dentry) return -EOPNOTSUPP; return 0; } -static int fanotify_events_supported(struct path *path, __u64 mask, +static int fanotify_events_supported(struct fsnotify_group *group, + const struct path *path, __u64 mask, unsigned int flags) { unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; + /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ + bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || + (mask & FAN_RENAME) || + (flags & FAN_MARK_IGNORE); /* * Some filesystems such as 'proc' acquire unusual locks when opening @@ -1368,6 +1584,15 @@ static int fanotify_events_supported(struct path *path, __u64 mask, path->mnt->mnt_sb->s_flags & SB_NOUSER) return -EINVAL; + /* + * We shouldn't have allowed setting dirent events and the directory + * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode, + * but because we always allowed it, error only when using new APIs. + */ + if (strict_dir_events && mark_type == FAN_MARK_INODE && + !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) + return -ENOTDIR; + return 0; } @@ -1382,7 +1607,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, __kernel_fsid_t __fsid, *fsid = NULL; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; - bool ignored = flags & FAN_MARK_IGNORED_MASK; + unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; + unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; unsigned int obj_type, fid_mode; u32 umask = 0; int ret; @@ -1411,7 +1637,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; } - switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { + switch (mark_cmd) { case FAN_MARK_ADD: case FAN_MARK_REMOVE: if (!mask) @@ -1431,9 +1657,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (mask & ~valid_mask) return -EINVAL; - /* Event flags (ONDIR, ON_CHILD) are meaningless in ignored mask */ - if (ignored) + + /* We don't allow FAN_MARK_IGNORE & FAN_MARK_IGNORED_MASK together */ + if (ignore == (FAN_MARK_IGNORE | FAN_MARK_IGNORED_MASK)) + return -EINVAL; + + /* + * Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with + * FAN_MARK_IGNORED_MASK. + */ + if (ignore == FAN_MARK_IGNORED_MASK) { mask &= ~FANOTIFY_EVENT_FLAGS; + umask = FANOTIFY_EVENT_FLAGS; + } f = fdget(fanotify_fd); if (unlikely(!f.file)) @@ -1465,19 +1701,39 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, group->priority == FS_PRIO_0) goto fput_and_out; + if (mask & FAN_FS_ERROR && + mark_type != FAN_MARK_FILESYSTEM) + goto fput_and_out; + + /* + * Evictable is only relevant for inode marks, because only inode object + * can be evicted on memory pressure. + */ + if (flags & FAN_MARK_EVICTABLE && + mark_type != FAN_MARK_INODE) + goto fput_and_out; + /* - * Events with data type inode do not carry enough information to report - * event->fd, so we do not allow setting a mask for inode events unless - * group supports reporting fid. - * inode events are not supported on a mount mark, because they do not - * carry enough information (i.e. path) to be filtered by mount point. + * Events that do not carry enough information to report + * event->fd require a group that supports reporting fid. Those + * events are not supported on a mount mark, because they do not + * carry enough information (i.e. path) to be filtered by mount + * point. */ fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); - if (mask & FANOTIFY_INODE_EVENTS && + if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) && (!fid_mode || mark_type == FAN_MARK_MOUNT)) goto fput_and_out; - if (flags & FAN_MARK_FLUSH) { + /* + * FAN_RENAME uses special info type records to report the old and + * new parent+name. Reporting only old and new parent id is less + * useful and was not implemented. + */ + if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME)) + goto fput_and_out; + + if (mark_cmd == FAN_MARK_FLUSH) { ret = 0; if (mark_type == FAN_MARK_MOUNT) fsnotify_clear_vfsmount_marks_by_group(group); @@ -1493,14 +1749,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (ret) goto fput_and_out; - if (flags & FAN_MARK_ADD) { - ret = fanotify_events_supported(&path, mask, flags); + if (mark_cmd == FAN_MARK_ADD) { + ret = fanotify_events_supported(group, &path, mask, flags); if (ret) goto path_put_and_out; } if (fid_mode) { - ret = fanotify_test_fid(&path, &__fsid); + ret = fanotify_test_fsid(path.dentry, &__fsid); + if (ret) + goto path_put_and_out; + + ret = fanotify_test_fid(path.dentry); if (ret) goto path_put_and_out; @@ -1513,6 +1773,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, else mnt = path.mnt; + ret = mnt ? -EINVAL : -EISDIR; + /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ + if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE && + (mnt || S_ISDIR(inode->i_mode)) && + !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) + goto path_put_and_out; + /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ if (mnt || !S_ISDIR(inode->i_mode)) { mask &= ~FAN_EVENT_ON_CHILD; @@ -1522,12 +1789,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * events with parent/name info for non-directory. */ if ((fid_mode & FAN_REPORT_DIR_FID) && - (flags & FAN_MARK_ADD) && !ignored) + (flags & FAN_MARK_ADD) && !ignore) mask |= FAN_EVENT_ON_CHILD; } /* create/update an inode mark */ - switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { + switch (mark_cmd) { case FAN_MARK_ADD: if (mark_type == FAN_MARK_MOUNT) ret = fanotify_add_vfsmount_mark(group, mnt, mask, @@ -1604,8 +1871,8 @@ static int __init fanotify_user_setup(void) FANOTIFY_DEFAULT_MAX_USER_MARKS); BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 11); - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 3451708fd0..55081ae3a6 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -14,6 +14,7 @@ #include #include "inotify/inotify.h" +#include "fanotify/fanotify.h" #include "fdinfo.h" #include "fsnotify.h" @@ -28,13 +29,13 @@ static void show_fdinfo(struct seq_file *m, struct file *f, struct fsnotify_group *group = f->private_data; struct fsnotify_mark *mark; - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); list_for_each_entry(mark, &group->marks_list, g_list) { show(m, mark); if (seq_has_overflowed(m)) break; } - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); } #if defined(CONFIG_EXPORTFS) @@ -103,19 +104,16 @@ void inotify_show_fdinfo(struct seq_file *m, struct file *f) static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) { - unsigned int mflags = 0; + unsigned int mflags = fanotify_mark_user_flags(mark); struct inode *inode; - if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) - mflags |= FAN_MARK_IGNORED_SURV_MODIFY; - if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = igrab(fsnotify_conn_inode(mark->connector)); if (!inode) return; seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ", inode->i_ino, inode->i_sb->s_dev, - mflags, mark->mask, mark->ignored_mask); + mflags, mark->mask, mark->ignore_mask); show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); @@ -123,12 +121,12 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) struct mount *mnt = fsnotify_conn_mount(mark->connector); seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", - mnt->mnt_id, mflags, mark->mask, mark->ignored_mask); + mnt->mnt_id, mflags, mark->mask, mark->ignore_mask); } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) { struct super_block *sb = fsnotify_conn_sb(mark->connector); seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n", - sb->s_dev, mflags, mark->mask, mark->ignored_mask); + sb->s_dev, mflags, mark->mask, mark->ignore_mask); } } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 963e6ce75b..7974e91ffe 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -70,8 +70,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb) spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); - if (iput_inode) - iput(iput_inode); + iput(iput_inode); /* for each watch, send FS_UNMOUNT and then remove it */ fsnotify_inode(inode, FS_UNMOUNT); @@ -85,8 +84,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb) } spin_unlock(&sb->s_inode_list_lock); - if (iput_inode) - iput(iput_inode); + iput(iput_inode); } void fsnotify_sb_delete(struct super_block *sb) @@ -102,7 +100,7 @@ void fsnotify_sb_delete(struct super_block *sb) * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event * on a child we run all of our children and set a dentry flag saying that the - * parent cares. Thus when an event happens on a child it can quickly tell if + * parent cares. Thus when an event happens on a child it can quickly tell * if there is a need to find a parent and send the event to the parent. */ void __fsnotify_update_child_dentry_flags(struct inode *inode) @@ -252,7 +250,10 @@ static int fsnotify_handle_inode_event(struct fsnotify_group *group, if (WARN_ON_ONCE(!ops->handle_inode_event)) return 0; - if ((inode_mark->mask & FS_EXCL_UNLINK) && + if (WARN_ON_ONCE(!inode && !dir)) + return 0; + + if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) && path && d_unlinked(path->dentry)) return 0; @@ -276,23 +277,28 @@ static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask, WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info))) return 0; - if (parent_mark) { - /* - * parent_mark indicates that the parent inode is watching - * children and interested in this event, which is an event - * possible on child. But is *this mark* watching children and - * interested in this event? - */ - if (parent_mark->mask & FS_EVENT_ON_CHILD) { - ret = fsnotify_handle_inode_event(group, parent_mark, mask, - data, data_type, dir, name, 0); - if (ret) - return ret; - } - if (!inode_mark) + /* + * For FS_RENAME, 'dir' is old dir and 'data' is new dentry. + * The only ->handle_inode_event() backend that supports FS_RENAME is + * dnotify, where it means file was renamed within same parent. + */ + if (mask & FS_RENAME) { + struct dentry *moved = fsnotify_data_dentry(data, data_type); + + if (dir != moved->d_parent->d_inode) return 0; } + if (parent_mark) { + ret = fsnotify_handle_inode_event(group, parent_mark, mask, + data, data_type, dir, name, 0); + if (ret) + return ret; + } + + if (!inode_mark) + return 0; + if (mask & FS_EVENT_ON_CHILD) { /* * Some events can be sent on both parent dir and child marks @@ -318,42 +324,36 @@ static int send_to_group(__u32 mask, const void *data, int data_type, struct fsnotify_group *group = NULL; __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); __u32 marks_mask = 0; - __u32 marks_ignored_mask = 0; + __u32 marks_ignore_mask = 0; + bool is_dir = mask & FS_ISDIR; struct fsnotify_mark *mark; int type; - if (WARN_ON(!iter_info->report_mask)) + if (!iter_info->report_mask) return 0; /* clear ignored on inode modification */ if (mask & FS_MODIFY) { - fsnotify_foreach_obj_type(type) { - if (!fsnotify_iter_should_report_type(iter_info, type)) - continue; - mark = iter_info->marks[type]; - if (mark && - !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) - mark->ignored_mask = 0; + fsnotify_foreach_iter_mark_type(iter_info, mark, type) { + if (!(mark->flags & + FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) + mark->ignore_mask = 0; } } - fsnotify_foreach_obj_type(type) { - if (!fsnotify_iter_should_report_type(iter_info, type)) - continue; - mark = iter_info->marks[type]; - /* does the object mark tell us to do something? */ - if (mark) { - group = mark->group; - marks_mask |= mark->mask; - marks_ignored_mask |= mark->ignored_mask; - } + /* Are any of the group marks interested in this event? */ + fsnotify_foreach_iter_mark_type(iter_info, mark, type) { + group = mark->group; + marks_mask |= mark->mask; + marks_ignore_mask |= + fsnotify_effective_ignore_mask(mark, is_dir, type); } - pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n", - __func__, group, mask, marks_mask, marks_ignored_mask, + pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n", + __func__, group, mask, marks_mask, marks_ignore_mask, data, data_type, dir, cookie); - if (!(test_mask & marks_mask & ~marks_ignored_mask)) + if (!(test_mask & marks_mask & ~marks_ignore_mask)) return 0; if (group->ops->handle_event) { @@ -390,11 +390,11 @@ static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) /* * iter_info is a multi head priority queue of marks. - * Pick a subset of marks from queue heads, all with the - * same group and set the report_mask for selected subset. - * Returns the report_mask of the selected subset. + * Pick a subset of marks from queue heads, all with the same group + * and set the report_mask to a subset of the selected marks. + * Returns false if there are no more groups to iterate. */ -static unsigned int fsnotify_iter_select_report_types( +static bool fsnotify_iter_select_report_types( struct fsnotify_iter_info *iter_info) { struct fsnotify_group *max_prio_group = NULL; @@ -402,7 +402,7 @@ static unsigned int fsnotify_iter_select_report_types( int type; /* Choose max prio group among groups of all queue heads */ - fsnotify_foreach_obj_type(type) { + fsnotify_foreach_iter_type(type) { mark = iter_info->marks[type]; if (mark && fsnotify_compare_groups(max_prio_group, mark->group) > 0) @@ -410,30 +410,49 @@ static unsigned int fsnotify_iter_select_report_types( } if (!max_prio_group) - return 0; + return false; /* Set the report mask for marks from same group as max prio group */ + iter_info->current_group = max_prio_group; iter_info->report_mask = 0; - fsnotify_foreach_obj_type(type) { + fsnotify_foreach_iter_type(type) { mark = iter_info->marks[type]; - if (mark && - fsnotify_compare_groups(max_prio_group, mark->group) == 0) + if (mark && mark->group == iter_info->current_group) { + /* + * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode + * is watching children and interested in this event, + * which is an event possible on child. + * But is *this mark* watching children? + */ + if (type == FSNOTIFY_ITER_TYPE_PARENT && + !(mark->mask & FS_EVENT_ON_CHILD) && + !(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD)) + continue; + fsnotify_iter_set_report_type(iter_info, type); + } } - return iter_info->report_mask; + return true; } /* - * Pop from iter_info multi head queue, the marks that were iterated in the + * Pop from iter_info multi head queue, the marks that belong to the group of * current iteration step. */ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) { + struct fsnotify_mark *mark; int type; - fsnotify_foreach_obj_type(type) { - if (fsnotify_iter_should_report_type(iter_info, type)) + /* + * We cannot use fsnotify_foreach_iter_mark_type() here because we + * may need to advance a mark of type X that belongs to current_group + * but was not selected for reporting. + */ + fsnotify_foreach_iter_type(type) { + mark = iter_info->marks[type]; + if (mark && mark->group == iter_info->current_group) iter_info->marks[type] = fsnotify_next_mark(iter_info->marks[type]); } @@ -455,18 +474,20 @@ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) * @file_name is relative to * @file_name: optional file name associated with event * @inode: optional inode associated with event - - * either @dir or @inode must be non-NULL. - * if both are non-NULL event may be reported to both. + * If @dir and @inode are both non-NULL, event may be + * reported to both. * @cookie: inotify rename cookie */ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, struct inode *inode, u32 cookie) { const struct path *path = fsnotify_data_path(data, data_type); + struct super_block *sb = fsnotify_data_sb(data, data_type); struct fsnotify_iter_info iter_info = {}; - struct super_block *sb; struct mount *mnt = NULL; - struct inode *parent = NULL; + struct inode *inode2 = NULL; + struct dentry *moved; + int inode2_type; int ret = 0; __u32 test_mask, marks_mask; @@ -476,14 +497,20 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, if (!inode) { /* Dirent event - report on TYPE_INODE to dir */ inode = dir; + /* For FS_RENAME, inode is old_dir and inode2 is new_dir */ + if (mask & FS_RENAME) { + moved = fsnotify_data_dentry(data, data_type); + inode2 = moved->d_parent->d_inode; + inode2_type = FSNOTIFY_ITER_TYPE_INODE2; + } } else if (mask & FS_EVENT_ON_CHILD) { /* * Event on child - report on TYPE_PARENT to dir if it is * watching children and on TYPE_INODE to child. */ - parent = dir; + inode2 = dir; + inode2_type = FSNOTIFY_ITER_TYPE_PARENT; } - sb = inode->i_sb; /* * Optimization: srcu_read_lock() has a memory barrier which can @@ -495,7 +522,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, if (!sb->s_fsnotify_marks && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && - (!parent || !parent->i_fsnotify_marks)) + (!inode2 || !inode2->i_fsnotify_marks)) return 0; marks_mask = sb->s_fsnotify_mask; @@ -503,33 +530,35 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, marks_mask |= mnt->mnt_fsnotify_mask; if (inode) marks_mask |= inode->i_fsnotify_mask; - if (parent) - marks_mask |= parent->i_fsnotify_mask; + if (inode2) + marks_mask |= inode2->i_fsnotify_mask; /* - * if this is a modify event we may need to clear the ignored masks - * otherwise return if none of the marks care about this type of event. + * If this is a modify event we may need to clear some ignore masks. + * In that case, the object with ignore masks will have the FS_MODIFY + * event in its mask. + * Otherwise, return if none of the marks care about this type of event. */ test_mask = (mask & ALL_FSNOTIFY_EVENTS); - if (!(mask & FS_MODIFY) && !(test_mask & marks_mask)) + if (!(test_mask & marks_mask)) return 0; iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] = + iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = fsnotify_first_mark(&sb->s_fsnotify_marks); if (mnt) { - iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] = + iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); } if (inode) { - iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = + iter_info.marks[FSNOTIFY_ITER_TYPE_INODE] = fsnotify_first_mark(&inode->i_fsnotify_marks); } - if (parent) { - iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] = - fsnotify_first_mark(&parent->i_fsnotify_marks); + if (inode2) { + iter_info.marks[inode2_type] = + fsnotify_first_mark(&inode2->i_fsnotify_marks); } /* @@ -558,7 +587,7 @@ static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 87d8a50ee8..fde74eb333 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -76,10 +76,6 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) */ extern void __fsnotify_update_child_dentry_flags(struct inode *inode); -/* allocate and destroy and event holder to attach events to notification/access queues */ -extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void); -extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder); - extern struct kmem_cache *fsnotify_mark_connector_cachep; #endif /* __FS_NOTIFY_FSNOTIFY_H_ */ diff --git a/fs/notify/group.c b/fs/notify/group.c index fb89c35129..1de6631a39 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -58,7 +58,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group) fsnotify_group_stop_queueing(group); /* Clear all marks for this group and queue them for destruction */ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES_MASK); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_ANY); /* * Some marks can still be pinned when waiting for response from @@ -88,7 +88,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group) * that deliberately ignores overflow events. */ if (group->overflow_event) - group->ops->free_event(group->overflow_event); + group->ops->free_event(group, group->overflow_event); fsnotify_put_group(group); } @@ -112,8 +112,10 @@ void fsnotify_put_group(struct fsnotify_group *group) EXPORT_SYMBOL_GPL(fsnotify_put_group); static struct fsnotify_group *__fsnotify_alloc_group( - const struct fsnotify_ops *ops, gfp_t gfp) + const struct fsnotify_ops *ops, + int flags, gfp_t gfp) { + static struct lock_class_key nofs_marks_lock; struct fsnotify_group *group; group = kzalloc(sizeof(struct fsnotify_group), gfp); @@ -133,6 +135,17 @@ static struct fsnotify_group *__fsnotify_alloc_group( INIT_LIST_HEAD(&group->marks_list); group->ops = ops; + group->flags = flags; + /* + * For most backends, eviction of inode with a mark is not expected, + * because marks hold a refcount on the inode against eviction. + * + * Use a different lockdep class for groups that support evictable + * inode marks, because with evictable marks, mark_mutex is NOT + * fs-reclaim safe - the mutex is taken when evicting inodes. + */ + if (flags & FSNOTIFY_GROUP_NOFS) + lockdep_set_class(&group->mark_mutex, &nofs_marks_lock); return group; } @@ -140,20 +153,15 @@ static struct fsnotify_group *__fsnotify_alloc_group( /* * Create a new fsnotify_group and hold a reference for the group returned. */ -struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) +struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops, + int flags) { - return __fsnotify_alloc_group(ops, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(fsnotify_alloc_group); + gfp_t gfp = (flags & FSNOTIFY_GROUP_USER) ? GFP_KERNEL_ACCOUNT : + GFP_KERNEL; -/* - * Create a new fsnotify_group and hold a reference for the group returned. - */ -struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops) -{ - return __fsnotify_alloc_group(ops, GFP_KERNEL_ACCOUNT); + return __fsnotify_alloc_group(ops, flags, gfp); } -EXPORT_SYMBOL_GPL(fsnotify_alloc_user_group); +EXPORT_SYMBOL_GPL(fsnotify_alloc_group); int fsnotify_fasync(int fd, struct file *file, int on) { diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 8f00151eb7..7d5df7a215 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -27,11 +27,18 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) * userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is * used only internally to the kernel. */ -#define INOTIFY_USER_MASK (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK) +#define INOTIFY_USER_MASK (IN_ALL_EVENTS) static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark) { - return fsn_mark->mask & INOTIFY_USER_MASK; + __u32 mask = fsn_mark->mask & INOTIFY_USER_MASK; + + if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) + mask |= IN_EXCL_UNLINK; + if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT) + mask |= IN_ONESHOT; + + return mask; } extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index b0530f75b2..993375f0db 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -123,13 +123,13 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, if (len) strcpy(event->name, name->name); - ret = fsnotify_add_event(group, fsn_event, inotify_merge, NULL); + ret = fsnotify_add_event(group, fsn_event, inotify_merge); if (ret) { /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); } - if (inode_mark->mask & IN_ONESHOT) + if (inode_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT) fsnotify_destroy_mark(inode_mark, group); return 0; @@ -184,7 +184,8 @@ static void inotify_free_group_priv(struct fsnotify_group *group) dec_inotify_instances(group->inotify_data.ucounts); } -static void inotify_free_event(struct fsnotify_event *fsn_event) +static void inotify_free_event(struct fsnotify_group *group, + struct fsnotify_event *fsn_event) { kfree(INOTIFY_E(fsn_event)); } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 9fb7701d2f..131938986e 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -94,10 +94,10 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) __u32 mask; /* - * Everything should accept their own ignored and should receive events - * when the inode is unmounted. All directories care about children. + * Everything should receive events when the inode is unmounted. + * All directories care about children. */ - mask = (FS_IN_IGNORED | FS_UNMOUNT); + mask = (FS_UNMOUNT); if (S_ISDIR(inode->i_mode)) mask |= FS_EVENT_ON_CHILD; @@ -107,13 +107,28 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) return mask; } +#define INOTIFY_MARK_FLAGS \ + (FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT) + +static inline unsigned int inotify_arg_to_flags(u32 arg) +{ + unsigned int flags = 0; + + if (arg & IN_EXCL_UNLINK) + flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK; + if (arg & IN_ONESHOT) + flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT; + + return flags; +} + static inline u32 inotify_mask_to_arg(__u32 mask) { return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED | IN_Q_OVERFLOW); } -/* intofiy userspace file descriptor functions */ +/* inotify userspace file descriptor functions */ static __poll_t inotify_poll(struct file *file, poll_table *wait) { struct fsnotify_group *group = file->private_data; @@ -518,13 +533,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, struct fsnotify_mark *fsn_mark; struct inotify_inode_mark *i_mark; __u32 old_mask, new_mask; - __u32 mask; - int add = (arg & IN_MASK_ADD); + int replace = !(arg & IN_MASK_ADD); int create = (arg & IN_MASK_CREATE); int ret; - mask = inotify_arg_to_mask(inode, arg); - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; @@ -537,10 +549,12 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, spin_lock(&fsn_mark->lock); old_mask = fsn_mark->mask; - if (add) - fsn_mark->mask |= mask; - else - fsn_mark->mask = mask; + if (replace) { + fsn_mark->mask = 0; + fsn_mark->flags &= ~INOTIFY_MARK_FLAGS; + } + fsn_mark->mask |= inotify_arg_to_mask(inode, arg); + fsn_mark->flags |= inotify_arg_to_flags(arg); new_mask = fsn_mark->mask; spin_unlock(&fsn_mark->lock); @@ -571,19 +585,17 @@ static int inotify_new_watch(struct fsnotify_group *group, u32 arg) { struct inotify_inode_mark *tmp_i_mark; - __u32 mask; int ret; struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; - mask = inotify_arg_to_mask(inode, arg); - tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); if (unlikely(!tmp_i_mark)) return -ENOMEM; fsnotify_init_mark(&tmp_i_mark->fsn_mark, group); - tmp_i_mark->fsn_mark.mask = mask; + tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg); + tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg); tmp_i_mark->wd = -1; ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); @@ -620,13 +632,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod { int ret = 0; - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); /* try to update and existing watch with the new arg */ ret = inotify_update_existing_watch(group, inode, arg); /* no mark present, try to add a new one */ if (ret == -ENOENT) ret = inotify_new_watch(group, inode, arg); - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); return ret; } @@ -636,7 +648,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) struct fsnotify_group *group; struct inotify_event_info *oevent; - group = fsnotify_alloc_user_group(&inotify_fsnotify_ops); + group = fsnotify_alloc_group(&inotify_fsnotify_ops, + FSNOTIFY_GROUP_USER); if (IS_ERR(group)) return group; @@ -837,9 +850,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); - BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK); BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); - BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index bea106fac0..c74ef94744 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -116,20 +116,64 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) return *fsnotify_conn_mask_p(conn); } -static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) +static void fsnotify_get_inode_ref(struct inode *inode) +{ + ihold(inode); + atomic_long_inc(&inode->i_sb->s_fsnotify_connectors); +} + +/* + * Grab or drop inode reference for the connector if needed. + * + * When it's time to drop the reference, we only clear the HAS_IREF flag and + * return the inode object. fsnotify_drop_object() will be resonsible for doing + * iput() outside of spinlocks. This happens when last mark that wanted iref is + * detached. + */ +static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn, + bool want_iref) +{ + bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF; + struct inode *inode = NULL; + + if (conn->type != FSNOTIFY_OBJ_TYPE_INODE || + want_iref == has_iref) + return NULL; + + if (want_iref) { + /* Pin inode if any mark wants inode refcount held */ + fsnotify_get_inode_ref(fsnotify_conn_inode(conn)); + conn->flags |= FSNOTIFY_CONN_FLAG_HAS_IREF; + } else { + /* Unpin inode after detach of last mark that wanted iref */ + inode = fsnotify_conn_inode(conn); + conn->flags &= ~FSNOTIFY_CONN_FLAG_HAS_IREF; + } + + return inode; +} + +static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; + bool want_iref = false; struct fsnotify_mark *mark; assert_spin_locked(&conn->lock); /* We can get detached connector here when inode is getting unlinked. */ if (!fsnotify_valid_obj_type(conn->type)) - return; + return NULL; hlist_for_each_entry(mark, &conn->list, obj_list) { - if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) - new_mask |= mark->mask; + if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) + continue; + new_mask |= fsnotify_calc_mask(mark); + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE && + !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) + want_iref = true; } *fsnotify_conn_mask_p(conn) = new_mask; + + return fsnotify_update_iref(conn, want_iref); } /* @@ -169,12 +213,6 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) } } -static void fsnotify_get_inode_ref(struct inode *inode) -{ - ihold(inode); - atomic_long_inc(&inode->i_sb->s_fsnotify_connectors); -} - static void fsnotify_put_inode_ref(struct inode *inode) { struct super_block *sb = inode->i_sb; @@ -213,6 +251,10 @@ static void *fsnotify_detach_connector_from_object( if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = fsnotify_conn_inode(conn); inode->i_fsnotify_mask = 0; + + /* Unpin inode when detaching from connector */ + if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF)) + inode = NULL; } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { @@ -274,7 +316,8 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) objp = fsnotify_detach_connector_from_object(conn, &type); free_conn = true; } else { - __fsnotify_recalc_mask(conn); + objp = __fsnotify_recalc_mask(conn); + type = conn->type; } WRITE_ONCE(mark->connector, NULL); spin_unlock(&conn->lock); @@ -353,7 +396,7 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) { int type; - fsnotify_foreach_obj_type(type) { + fsnotify_foreach_iter_type(type) { /* This can fail if mark is being removed */ if (!fsnotify_get_mark_safe(iter_info->marks[type])) { __release(&fsnotify_mark_srcu); @@ -382,7 +425,7 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) int type; iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - fsnotify_foreach_obj_type(type) + fsnotify_foreach_iter_type(type) fsnotify_put_mark_wake(iter_info->marks[type]); } @@ -398,9 +441,7 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) */ void fsnotify_detach_mark(struct fsnotify_mark *mark) { - struct fsnotify_group *group = mark->group; - - WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); + fsnotify_group_assert_locked(mark->group); WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && refcount_read(&mark->refcnt) < 1 + !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); @@ -452,9 +493,9 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group) { - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); fsnotify_detach_mark(mark); - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); fsnotify_free_mark(mark); } EXPORT_SYMBOL_GPL(fsnotify_destroy_mark); @@ -496,10 +537,9 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) } static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, - unsigned int type, + unsigned int obj_type, __kernel_fsid_t *fsid) { - struct inode *inode = NULL; struct fsnotify_mark_connector *conn; conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); @@ -507,7 +547,8 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, return -ENOMEM; spin_lock_init(&conn->lock); INIT_HLIST_HEAD(&conn->list); - conn->type = type; + conn->flags = 0; + conn->type = obj_type; conn->obj = connp; /* Cache fsid of filesystem containing the object */ if (fsid) { @@ -517,10 +558,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, conn->fsid.val[0] = conn->fsid.val[1] = 0; conn->flags = 0; } - if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { - inode = fsnotify_conn_inode(conn); - fsnotify_get_inode_ref(inode); - } fsnotify_get_sb_connectors(conn); /* @@ -529,8 +566,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, */ if (cmpxchg(connp, NULL, conn)) { /* Someone else created list structure for us */ - if (inode) - fsnotify_put_inode_ref(inode); fsnotify_put_sb_connectors(conn); kmem_cache_free(fsnotify_mark_connector_cachep, conn); } @@ -572,15 +607,16 @@ static struct fsnotify_mark_connector *fsnotify_grab_connector( * priority, highest number first, and then by the group's location in memory. */ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int type, - int allow_dups, __kernel_fsid_t *fsid) + fsnotify_connp_t *connp, + unsigned int obj_type, + int add_flags, __kernel_fsid_t *fsid) { struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; int cmp; int err = 0; - if (WARN_ON(!fsnotify_valid_obj_type(type))) + if (WARN_ON(!fsnotify_valid_obj_type(obj_type))) return -EINVAL; /* Backend is expected to check for zero fsid (e.g. tmpfs) */ @@ -592,7 +628,8 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, conn = fsnotify_grab_connector(connp); if (!conn) { spin_unlock(&mark->lock); - err = fsnotify_attach_connector_to_object(connp, type, fsid); + err = fsnotify_attach_connector_to_object(connp, obj_type, + fsid); if (err) return err; goto restart; @@ -631,7 +668,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, if ((lmark->group == mark->group) && (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && - !allow_dups) { + !(mark->group->flags & FSNOTIFY_GROUP_DUPS)) { err = -EEXIST; goto out_err; } @@ -665,13 +702,13 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, * event types should be delivered to which group. */ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int type, - int allow_dups, __kernel_fsid_t *fsid) + fsnotify_connp_t *connp, unsigned int obj_type, + int add_flags, __kernel_fsid_t *fsid) { struct fsnotify_group *group = mark->group; int ret = 0; - BUG_ON(!mutex_is_locked(&group->mark_mutex)); + fsnotify_group_assert_locked(group); /* * LOCKING ORDER!!!! @@ -686,12 +723,11 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid); + ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags, fsid); if (ret) goto err; - if (mark->mask) - fsnotify_recalc_mask(mark->connector); + fsnotify_recalc_mask(mark->connector); return ret; err: @@ -706,14 +742,15 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, } int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int type, int allow_dups, __kernel_fsid_t *fsid) + unsigned int obj_type, int add_flags, + __kernel_fsid_t *fsid) { int ret; struct fsnotify_group *group = mark->group; - mutex_lock(&group->mark_mutex); - ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups, fsid); - mutex_unlock(&group->mark_mutex); + fsnotify_group_lock(group); + ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags, fsid); + fsnotify_group_unlock(group); return ret; } EXPORT_SYMBOL_GPL(fsnotify_add_mark); @@ -747,14 +784,14 @@ EXPORT_SYMBOL_GPL(fsnotify_find_mark); /* Clear any marks in a group with given type mask */ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, - unsigned int type_mask) + unsigned int obj_type) { struct fsnotify_mark *lmark, *mark; LIST_HEAD(to_free); struct list_head *head = &to_free; /* Skip selection step if we want to clear all marks. */ - if (type_mask == FSNOTIFY_OBJ_ALL_TYPES_MASK) { + if (obj_type == FSNOTIFY_OBJ_TYPE_ANY) { head = &group->marks_list; goto clear; } @@ -767,24 +804,24 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, * move marks to free to to_free list in one go and then free marks in * to_free list one by one. */ - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if ((1U << mark->connector->type) & type_mask) + if (mark->connector->type == obj_type) list_move(&mark->g_list, &to_free); } - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); clear: while (1) { - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); if (list_empty(head)) { - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); break; } mark = list_first_entry(head, struct fsnotify_mark, g_list); fsnotify_get_mark(mark); fsnotify_detach_mark(mark); - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); fsnotify_free_mark(mark); fsnotify_put_mark(mark); } diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 32f45543b9..9022ae650c 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -64,7 +64,7 @@ void fsnotify_destroy_event(struct fsnotify_group *group, WARN_ON(!list_empty(&event->list)); spin_unlock(&group->notification_lock); } - group->ops->free_event(event); + group->ops->free_event(group, event); } /* @@ -78,12 +78,12 @@ void fsnotify_destroy_event(struct fsnotify_group *group, * 2 if the event was not queued - either the queue of events has overflown * or the group is shutting down. */ -int fsnotify_add_event(struct fsnotify_group *group, - struct fsnotify_event *event, - int (*merge)(struct fsnotify_group *, - struct fsnotify_event *), - void (*insert)(struct fsnotify_group *, - struct fsnotify_event *)) +int fsnotify_insert_event(struct fsnotify_group *group, + struct fsnotify_event *event, + int (*merge)(struct fsnotify_group *, + struct fsnotify_event *), + void (*insert)(struct fsnotify_group *, + struct fsnotify_event *)) { int ret = 0; struct list_head *list = &group->notification_list; diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 72cdfa8727..98f57d0c70 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -475,6 +475,7 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx) vbo = (u64)bit << index_bits; if (vbo >= i_size) { ntfs_inode_err(dir, "Looks like your dir is corrupt"); + ctx->pos = eod; err = -EINVAL; goto out; } diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 6ba1357f3e..369ab64a0b 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -1181,7 +1181,8 @@ static int read_log_page(struct ntfs_log *log, u32 vbo, static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, struct restart_info *info) { - u32 skip, vbo; + u32 skip; + u64 vbo; struct RESTART_HDR *r_page = NULL; /* Determine which restart area we are looking for. */ diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 76935562d5..a069ae7a74 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1538,6 +1538,11 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, goto out1; } + if (data_size <= le64_to_cpu(alloc->nres.data_size)) { + /* Reuse index. */ + goto out; + } + /* Increase allocation. */ err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, &indx->alloc_run, data_size, &data_size, true, @@ -1548,6 +1553,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, goto out1; } +out: *vbn = bit << indx->idx2vbn_bits; return 0; diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 0ff673bb4b..ff45ad967f 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -37,7 +37,7 @@ static struct inode *ntfs_read_mft(struct inode *inode, bool is_dir; unsigned long ino = inode->i_ino; u32 rp_fa = 0, asize, t32; - u16 roff, rsize, names = 0; + u16 roff, rsize, names = 0, links = 0; const struct ATTR_FILE_NAME *fname = NULL; const struct INDEX_ROOT *root; struct REPARSE_DATA_BUFFER rp; // 0x18 bytes @@ -190,11 +190,12 @@ static struct inode *ntfs_read_mft(struct inode *inode, rsize < SIZEOF_ATTRIBUTE_FILENAME) goto out; + names += 1; fname = Add2Ptr(attr, roff); if (fname->type == FILE_NAME_DOS) goto next_attr; - names += 1; + links += 1; if (name && name->len == fname->name_len && !ntfs_cmp_names_cpu(name, (struct le_str *)&fname->name_len, NULL, false)) @@ -421,7 +422,7 @@ static struct inode *ntfs_read_mft(struct inode *inode, ni->mi.dirty = true; } - set_nlink(inode, names); + set_nlink(inode, links); if (S_ISDIR(mode)) { ni->std_fa |= FILE_ATTRIBUTE_DIRECTORY; diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index ba26a465b3..324c0b036f 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -59,7 +59,7 @@ struct GUID { struct cpu_str { u8 len; u8 unused; - u16 name[10]; + u16 name[]; }; struct le_str { diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index ac43e4a6d5..383fc3437f 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -508,16 +508,9 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, if (aoff + asize > used) return false; - if (ni && is_attr_indexed(attr)) { + if (ni && is_attr_indexed(attr) && attr->type == ATTR_NAME) { u16 links = le16_to_cpu(ni->mi.mrec->hard_links); - struct ATTR_FILE_NAME *fname = - attr->type != ATTR_NAME ? - NULL : - resident_data_ex(attr, - SIZEOF_ATTRIBUTE_FILENAME); - if (fname && fname->type == FILE_NAME_DOS) { - /* Do not decrease links count deleting DOS name. */ - } else if (!links) { + if (!links) { /* minor error. Not critical. */ } else { ni->mi.mrec->hard_links = cpu_to_le16(links - 1); diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index fbaf1c8431..0a71075042 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1448,8 +1448,6 @@ static int __init init_ntfs_fs(void) { int err; - pr_info("ntfs3: Max link count %u\n", NTFS_LINK_MAX); - if (IS_ENABLED(CONFIG_NTFS3_FS_POSIX_ACL)) pr_info("ntfs3: Enabled Linux POSIX ACLs support\n"); if (IS_ENABLED(CONFIG_NTFS3_64BIT_CLUSTER)) diff --git a/fs/open.c b/fs/open.c index 159a2765b7..43e5ca4324 100644 --- a/fs/open.c +++ b/fs/open.c @@ -979,6 +979,48 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); +/** + * dentry_create - Create and open a file + * @path: path to create + * @flags: O_ flags + * @mode: mode bits for new file + * @cred: credentials to use + * + * Caller must hold the parent directory's lock, and have prepared + * a negative dentry, placed in @path->dentry, for the new file. + * + * Caller sets @path->mnt to the vfsmount of the filesystem where + * the new file is to be created. The parent directory and the + * negative dentry must reside on the same filesystem instance. + * + * On success, returns a "struct file *". Otherwise a ERR_PTR + * is returned. + */ +struct file *dentry_create(const struct path *path, int flags, umode_t mode, + const struct cred *cred) +{ + struct file *f; + int error; + + validate_creds(cred); + f = alloc_empty_file(flags, cred); + if (IS_ERR(f)) + return f; + + error = vfs_create(mnt_user_ns(path->mnt), + d_inode(path->dentry->d_parent), + path->dentry, mode, true); + if (!error) + error = vfs_open(path, f); + + if (unlikely(error)) { + fput(f); + return ERR_PTR(error); + } + return f; +} +EXPORT_SYMBOL(dentry_create); + struct file *open_with_fake_path(const struct path *path, int flags, struct inode *inode, const struct cred *cred) { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index f825176ff4..07a312bf9b 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -355,10 +355,10 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino) return inode; } -static int openprom_remount(struct super_block *sb, int *flags, char *data) +static int openpromfs_reconfigure(struct fs_context *fc) { - sync_filesystem(sb); - *flags |= SB_NOATIME; + sync_filesystem(fc->root->d_sb); + fc->sb_flags |= SB_NOATIME; return 0; } @@ -366,7 +366,6 @@ static const struct super_operations openprom_sops = { .alloc_inode = openprom_alloc_inode, .free_inode = openprom_free_inode, .statfs = simple_statfs, - .remount_fs = openprom_remount, }; static int openprom_fill_super(struct super_block *s, struct fs_context *fc) @@ -416,6 +415,7 @@ static int openpromfs_get_tree(struct fs_context *fc) static const struct fs_context_operations openpromfs_context_ops = { .get_tree = openpromfs_get_tree, + .reconfigure = openpromfs_reconfigure, }; static int openpromfs_init_fs_context(struct fs_context *fc) diff --git a/fs/pipe.c b/fs/pipe.c index a8b8ef2dae..0a8095070b 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -423,6 +423,18 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) bool was_empty = false; bool wake_next_writer = false; + /* + * Reject writing to watch queue pipes before the point where we lock + * the pipe. + * Otherwise, lockdep would be unhappy if the caller already has another + * pipe locked. + * If we had to support locking a normal pipe and a notification pipe at + * the same time, we could set up lockdep annotations for that, but + * since we don't actually need that, it's simpler to just bail here. + */ + if (pipe_has_watch_queue(pipe)) + return -EXDEV; + /* Null write succeeds. */ if (unlikely(total_len == 0)) return 0; @@ -435,11 +447,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) goto out; } - if (pipe_has_watch_queue(pipe)) { - ret = -EXDEV; - goto out; - } - /* * If it wasn't empty we try to merge new data into * the last buffer. diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 5d3f944f60..295040c249 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -973,6 +973,8 @@ static ssize_t psz_kmsg_read(struct pstore_zone *zone, char *buf = kasprintf(GFP_KERNEL, "%s: Total %d times\n", kmsg_dump_reason_str(record->reason), record->count); + if (!buf) + return -ENOMEM; hlen = strlen(buf); record->buf = krealloc(buf, hlen + size, GFP_KERNEL); if (!record->buf) { diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index d019d6ac6a..fd55a4a04d 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -440,6 +440,8 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, kn = kernfs_find_and_get(kobj->sd, attr->name); if (kn) kernfs_break_active_protection(kn); + else + kobject_put(kobj); return kn; } EXPORT_SYMBOL_GPL(sysfs_break_active_protection); diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 1e9c520411..5800cb065c 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -82,9 +82,6 @@ static inline sysv_zone_t *block_end(struct buffer_head *bh) return (sysv_zone_t*)((char*)bh->b_data + bh->b_size); } -/* - * Requires read_lock(&pointers_lock) or write_lock(&pointers_lock) - */ static Indirect *get_branch(struct inode *inode, int depth, int offsets[], @@ -104,15 +101,18 @@ static Indirect *get_branch(struct inode *inode, bh = sb_bread(sb, block); if (!bh) goto failure; + read_lock(&pointers_lock); if (!verify_chain(chain, p)) goto changed; add_chain(++p, bh, (sysv_zone_t*)bh->b_data + *++offsets); + read_unlock(&pointers_lock); if (!p->key) goto no_block; } return NULL; changed: + read_unlock(&pointers_lock); brelse(bh); *err = -EAGAIN; goto no_block; @@ -218,9 +218,7 @@ static int get_block(struct inode *inode, sector_t iblock, struct buffer_head *b goto out; reread: - read_lock(&pointers_lock); partial = get_branch(inode, depth, offsets, chain, &err); - read_unlock(&pointers_lock); /* Simplest case - block found, no allocation needed */ if (!partial) { @@ -290,9 +288,9 @@ static Indirect *find_shared(struct inode *inode, *top = 0; for (k = depth; k > 1 && !offsets[k-1]; k--) ; + partial = get_branch(inode, k, offsets, chain, &err); write_lock(&pointers_lock); - partial = get_branch(inode, k, offsets, chain, &err); if (!partial) partial = chain + k-1; /* diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 7cc2abcb70..509144cc41 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -262,9 +262,6 @@ static int write_begin_slow(struct address_space *mapping, return err; } } - - SetPageUptodate(page); - ClearPageError(page); } if (PagePrivate(page)) @@ -463,9 +460,6 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, return err; } } - - SetPageUptodate(page); - ClearPageError(page); } err = allocate_budget(c, page, ui, appending); @@ -475,10 +469,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * If we skipped reading the page because we were going to * write all of it, then it is not up to date. */ - if (skipped_read) { + if (skipped_read) ClearPageChecked(page); - ClearPageUptodate(page); - } /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the @@ -569,6 +561,9 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, goto out; } + if (len == PAGE_SIZE) + SetPageUptodate(page); + if (!PagePrivate(page)) { attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 37dd3fe5b1..44725007cc 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -151,7 +151,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) if (!sbi->nls) { vbg_err("vboxsf: Count not load '%s' nls\n", nls_name); err = -EINVAL; - goto fail_free; + goto fail_destroy_idr; } } @@ -224,6 +224,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); if (sbi->nls) unload_nls(sbi->nls); +fail_destroy_idr: idr_destroy(&sbi->ino_idr); kfree(sbi); return err; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8471717c50..dd9ea351bc 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -81,8 +81,8 @@ #define RO_EXCEPTION_TABLE #endif -/* Align . to a 8 byte boundary equals to maximum function alignment. */ -#define ALIGN_FUNCTION() . = ALIGN(8) +/* Align . function alignment. */ +#define ALIGN_FUNCTION() . = ALIGN(CONFIG_FUNCTION_ALIGNMENT) /* * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 5190fd48d3..05cd782db9 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -240,9 +240,9 @@ int mipi_dsi_shutdown_peripheral(struct mipi_dsi_device *dsi); int mipi_dsi_turn_on_peripheral(struct mipi_dsi_device *dsi); int mipi_dsi_set_maximum_return_packet_size(struct mipi_dsi_device *dsi, u16 value); -ssize_t mipi_dsi_compression_mode(struct mipi_dsi_device *dsi, bool enable); -ssize_t mipi_dsi_picture_parameter_set(struct mipi_dsi_device *dsi, - const struct drm_dsc_picture_parameter_set *pps); +int mipi_dsi_compression_mode(struct mipi_dsi_device *dsi, bool enable); +int mipi_dsi_picture_parameter_set(struct mipi_dsi_device *dsi, + const struct drm_dsc_picture_parameter_set *pps); ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload, size_t size); diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 15a089a87c..f7ece14b10 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -444,7 +444,7 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, #define drm_dbg_core(drm, fmt, ...) \ drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_CORE, fmt, ##__VA_ARGS__) -#define drm_dbg(drm, fmt, ...) \ +#define drm_dbg_driver(drm, fmt, ...) \ drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DRIVER, fmt, ##__VA_ARGS__) #define drm_dbg_kms(drm, fmt, ...) \ drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_KMS, fmt, ##__VA_ARGS__) @@ -463,6 +463,7 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, #define drm_dbg_drmres(drm, fmt, ...) \ drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DRMRES, fmt, ##__VA_ARGS__) +#define drm_dbg(drm, fmt, ...) drm_dbg_driver(drm, fmt, ##__VA_ARGS__) /* * printk based logging diff --git a/include/linux/bpf.h b/include/linux/bpf.h index df15d4d445..4236de05a8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -321,7 +321,12 @@ enum bpf_type_flag { */ MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = MEM_RDONLY, + /* MEM was "allocated" from a different helper, and cannot be mixed + * with regular non-MEM_ALLOC'ed MEM types. + */ + MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = MEM_ALLOC, }; /* Max number of base types. */ @@ -405,7 +410,7 @@ enum bpf_return_type { RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, - RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is @@ -1646,17 +1651,17 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); struct btf *bpf_get_btf_vmlinux(void); /* Map specifics */ -struct xdp_buff; +struct xdp_frame; struct sk_buff; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; void __dev_flush(void); -int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, +int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx); -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, struct net_device *dev_rx); -int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, +int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); @@ -1665,7 +1670,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, bool exclude_ingress); void __cpu_map_flush(void); -int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, +int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, struct sk_buff *skb); @@ -1818,26 +1823,26 @@ static inline void __dev_flush(void) { } -struct xdp_buff; +struct xdp_frame; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; static inline -int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, +int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; } static inline -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; } static inline -int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, +int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress) { return 0; @@ -1865,7 +1870,7 @@ static inline void __cpu_map_flush(void) } static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, - struct xdp_buff *xdp, + struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3d04b48e50..c0993b079a 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -541,8 +541,8 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, void bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); -int check_ctx_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno); +int check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index caf3b95017..e1e6a045c3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -74,6 +74,8 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h index 8904063d4c..65eec5be8c 100644 --- a/include/linux/dev_printk.h +++ b/include/linux/dev_printk.h @@ -129,6 +129,16 @@ void _dev_info(const struct device *dev, const char *fmt, ...) _dev_printk(level, dev, fmt, ##__VA_ARGS__); \ }) +/* + * Dummy dev_printk for disabled debugging statements to use whilst maintaining + * gcc's format checking. + */ +#define dev_no_printk(level, dev, fmt, ...) \ + ({ \ + if (0) \ + _dev_printk(level, dev, fmt, ##__VA_ARGS__); \ + }) + /* * #defines for all the dev_ macros to prefix with whatever * possible use of #define dev_fmt(fmt) ... @@ -158,10 +168,7 @@ void _dev_info(const struct device *dev, const char *fmt, ...) dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__) #else #define dev_dbg(dev, fmt, ...) \ -({ \ - if (0) \ - dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \ -}) + dev_no_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__) #endif #ifdef CONFIG_PRINTK @@ -247,20 +254,14 @@ do { \ } while (0) #else #define dev_dbg_ratelimited(dev, fmt, ...) \ -do { \ - if (0) \ - dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \ -} while (0) + dev_no_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__) #endif #ifdef VERBOSE_DEBUG #define dev_vdbg dev_dbg #else #define dev_vdbg(dev, fmt, ...) \ -({ \ - if (0) \ - dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \ -}) + dev_no_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__) #endif /* diff --git a/include/linux/device.h b/include/linux/device.h index e270cb740b..3e04bd84f1 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -971,6 +971,7 @@ void device_link_del(struct device_link *link); void device_link_remove(void *consumer, struct device *supplier); void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); +void device_link_wait_removal(void); extern __printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index bfffe49435..2ff55ec902 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -69,6 +69,7 @@ struct dma_map_ops { int (*dma_supported)(struct device *dev, u64 mask); u64 (*get_required_mask)(struct device *dev); size_t (*max_mapping_size)(struct device *dev); + size_t (*opt_mapping_size)(void); unsigned long (*get_merge_boundary)(struct device *dev); }; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index dca2b1355b..fe3849434b 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -144,6 +144,7 @@ int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); +size_t dma_opt_mapping_size(struct device *dev); bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, @@ -266,6 +267,10 @@ static inline size_t dma_max_mapping_size(struct device *dev) { return 0; } +static inline size_t dma_opt_mapping_size(struct device *dev) +{ + return 0; +} static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return false; diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index 0aad774bea..b87c3b85a1 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -26,7 +26,7 @@ struct dnotify_struct { FS_MODIFY | FS_MODIFY_CHILD |\ FS_ACCESS | FS_ACCESS_CHILD |\ FS_ATTRIB | FS_ATTRIB_CHILD |\ - FS_CREATE | FS_DN_RENAME |\ + FS_CREATE | FS_RENAME |\ FS_MOVED_FROM | FS_MOVED_TO) extern int dir_notify_enable; diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index ca0e26a858..c31dce9c71 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -542,6 +542,31 @@ static inline unsigned long compare_ether_header(const void *a, const void *b) #endif } +/** + * eth_skb_pkt_type - Assign packet type if destination address does not match + * @skb: Assigned a packet type if address does not match @dev address + * @dev: Network device used to compare packet address against + * + * If the destination MAC address of the packet does not match the network + * device address, assign an appropriate packet type. + */ +static inline void eth_skb_pkt_type(struct sk_buff *skb, + const struct net_device *dev) +{ + const struct ethhdr *eth = eth_hdr(skb); + + if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { + if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else { + skb->pkt_type = PACKET_OTHERHOST; + } + } +} + /** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3260fe7148..218fc5c54e 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -221,8 +221,7 @@ struct export_operations { #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply atomic attribute updates */ -#define EXPORT_OP_SYNC_LOCKS (0x20) /* Filesystem can't do - asychronous blocking locks */ +#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ unsigned long flags; }; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index eec3b7c408..558844c8d2 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -25,7 +25,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ #define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FANOTIFY_PERM_CLASSES) -#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME) +#define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET) #define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD) @@ -61,14 +61,19 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ FAN_MARK_FILESYSTEM) +#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \ + FAN_MARK_FLUSH) + +#define FANOTIFY_MARK_IGNORE_BITS (FAN_MARK_IGNORED_MASK | \ + FAN_MARK_IGNORE) + #define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \ - FAN_MARK_ADD | \ - FAN_MARK_REMOVE | \ + FANOTIFY_MARK_CMD_BITS | \ + FANOTIFY_MARK_IGNORE_BITS | \ FAN_MARK_DONT_FOLLOW | \ FAN_MARK_ONLYDIR | \ - FAN_MARK_IGNORED_MASK | \ FAN_MARK_IGNORED_SURV_MODIFY | \ - FAN_MARK_FLUSH) + FAN_MARK_EVICTABLE) /* * Events that can be reported with data type FSNOTIFY_EVENT_PATH. @@ -82,15 +87,23 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ * Directory entry modification events - reported only to directory * where entry is modified and not to a watching parent. */ -#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE) +#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ + FAN_RENAME) + +/* Events that can be reported with event->fd */ +#define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS) /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */ #define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \ FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF) +/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */ +#define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR) + /* Events that user can request to be notified on */ #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \ - FANOTIFY_INODE_EVENTS) + FANOTIFY_INODE_EVENTS | \ + FANOTIFY_ERROR_EVENTS) /* Events that require a permission response from user */ #define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \ @@ -104,6 +117,10 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ FANOTIFY_PERM_EVENTS | \ FAN_Q_OVERFLOW | FAN_ONDIR) +/* Events and flags relevant only for directories */ +#define FANOTIFY_DIRONLY_EVENT_BITS (FANOTIFY_DIRENT_EVENTS | \ + FAN_EVENT_ON_CHILD | FAN_ONDIR) + #define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \ FANOTIFY_EVENT_FLAGS) diff --git a/include/linux/filter.h b/include/linux/filter.h index ddaeb2afc0..af0103bebb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1020,6 +1020,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog); +int xdp_do_redirect_frame(struct net_device *dev, + struct xdp_buff *xdp, + struct xdp_frame *xdpf, + struct bpf_prog *prog); void xdp_do_flush(void); /* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as diff --git a/include/linux/fpga/fpga-region.h b/include/linux/fpga/fpga-region.h index 27cb706275..1c446c2ce2 100644 --- a/include/linux/fpga/fpga-region.h +++ b/include/linux/fpga/fpga-region.h @@ -7,6 +7,27 @@ #include #include +struct fpga_region; + +/** + * struct fpga_region_info - collection of parameters an FPGA Region + * @mgr: fpga region manager + * @compat_id: FPGA region id for compatibility check. + * @priv: fpga region private data + * @get_bridges: optional function to get bridges to a list + * + * fpga_region_info contains parameters for the register_full function. + * These are separated into an info structure because they some are optional + * others could be added to in the future. The info structure facilitates + * maintaining a stable API. + */ +struct fpga_region_info { + struct fpga_manager *mgr; + struct fpga_compat_id *compat_id; + void *priv; + int (*get_bridges)(struct fpga_region *region); +}; + /** * struct fpga_region - FPGA Region structure * @dev: FPGA Region device @@ -15,6 +36,7 @@ * @mgr: FPGA manager * @info: FPGA image info * @compat_id: FPGA region id for compatibility check. + * @ops_owner: module containing the get_bridges function * @priv: private data * @get_bridges: optional function to get bridges to a list */ @@ -25,6 +47,7 @@ struct fpga_region { struct fpga_manager *mgr; struct fpga_image_info *info; struct fpga_compat_id *compat_id; + struct module *ops_owner; void *priv; int (*get_bridges)(struct fpga_region *region); }; @@ -37,15 +60,17 @@ struct fpga_region *fpga_region_class_find( int fpga_region_program_fpga(struct fpga_region *region); -struct fpga_region -*fpga_region_create(struct device *dev, struct fpga_manager *mgr, - int (*get_bridges)(struct fpga_region *)); -void fpga_region_free(struct fpga_region *region); -int fpga_region_register(struct fpga_region *region); -void fpga_region_unregister(struct fpga_region *region); +#define fpga_region_register_full(parent, info) \ + __fpga_region_register_full(parent, info, THIS_MODULE) +struct fpga_region * +__fpga_region_register_full(struct device *parent, const struct fpga_region_info *info, + struct module *owner); -struct fpga_region -*devm_fpga_region_create(struct device *dev, struct fpga_manager *mgr, - int (*get_bridges)(struct fpga_region *)); +#define fpga_region_register(parent, mgr, get_bridges) \ + __fpga_region_register(parent, mgr, get_bridges, THIS_MODULE) +struct fpga_region * +__fpga_region_register(struct device *parent, struct fpga_manager *mgr, + int (*get_bridges)(struct fpga_region *), struct module *owner); +void fpga_region_unregister(struct fpga_region *region); #endif /* _FPGA_REGION_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index f32723d937..61e86502fe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1066,6 +1066,7 @@ struct file_lock_operations { }; struct lock_manager_operations { + void *lm_mod_owner; fl_owner_t (*lm_get_owner)(fl_owner_t); void (*lm_put_owner)(fl_owner_t); void (*lm_notify)(struct file_lock *); /* unblock callback */ @@ -1074,6 +1075,8 @@ struct lock_manager_operations { int (*lm_change)(struct file_lock *, int, struct list_head *); void (*lm_setup)(struct file_lock *, void **); bool (*lm_breaker_owns_lease)(struct file_lock *); + bool (*lm_lock_expirable)(struct file_lock *cfl); + void (*lm_expire_lock)(void); }; struct lock_manager { @@ -1212,6 +1215,15 @@ extern void lease_unregister_notifier(struct notifier_block *); struct files_struct; extern void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files); +extern bool locks_owner_has_blockers(struct file_lock_context *flctx, + fl_owner_t owner); + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return smp_load_acquire(&inode->i_flctx); +} + #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1352,6 +1364,18 @@ static inline int lease_modify(struct file_lock *fl, int arg, struct files_struct; static inline void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) {} +static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, + fl_owner_t owner) +{ + return false; +} + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return NULL; +} + #endif /* !CONFIG_FILE_LOCKING */ static inline struct inode *file_inode(const struct file *f) @@ -2745,6 +2769,8 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt, name, flags, mode); } extern struct file * dentry_open(const struct path *, int, const struct cred *); +extern struct file *dentry_create(const struct path *path, int flags, + umode_t mode, const struct cred *cred); extern struct file * open_with_fake_path(const struct path *, int, struct inode*, const struct cred *); static inline struct file *file_clone_open(struct file *file) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a9477c14fa..bb8467cd11 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -26,20 +26,20 @@ * FS_EVENT_ON_CHILD mask on the parent inode and will not be reported if only * the child is interested and not the parent. */ -static inline void fsnotify_name(struct inode *dir, __u32 mask, - struct inode *child, - const struct qstr *name, u32 cookie) +static inline int fsnotify_name(__u32 mask, const void *data, int data_type, + struct inode *dir, const struct qstr *name, + u32 cookie) { if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0) - return; + return 0; - fsnotify(mask, child, FSNOTIFY_EVENT_INODE, dir, name, NULL, cookie); + return fsnotify(mask, data, data_type, dir, name, NULL, cookie); } static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, __u32 mask) { - fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0); + fsnotify_name(mask, dentry, FSNOTIFY_EVENT_DENTRY, dir, &dentry->d_name, 0); } static inline void fsnotify_inode(struct inode *inode, __u32 mask) @@ -86,7 +86,7 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, */ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) { - fsnotify_parent(dentry, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE); + fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY); } static inline int fsnotify_file(struct file *file, __u32 mask) @@ -144,18 +144,23 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, u32 fs_cookie = fsnotify_get_cookie(); __u32 old_dir_mask = FS_MOVED_FROM; __u32 new_dir_mask = FS_MOVED_TO; + __u32 rename_mask = FS_RENAME; const struct qstr *new_name = &moved->d_name; - if (old_dir == new_dir) - old_dir_mask |= FS_DN_RENAME; - if (isdir) { old_dir_mask |= FS_ISDIR; new_dir_mask |= FS_ISDIR; + rename_mask |= FS_ISDIR; } - fsnotify_name(old_dir, old_dir_mask, source, old_name, fs_cookie); - fsnotify_name(new_dir, new_dir_mask, source, new_name, fs_cookie); + /* Event with information about both old and new parent+name */ + fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY, + old_dir, old_name, 0); + + fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE, + old_dir, old_name, fs_cookie); + fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE, + new_dir, new_name, fs_cookie); if (target) fsnotify_link_count(target); @@ -190,16 +195,22 @@ static inline void fsnotify_inoderemove(struct inode *inode) /* * fsnotify_create - 'name' was linked in + * + * Caller must make sure that dentry->d_name is stable. + * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate + * ->d_inode later */ -static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) +static inline void fsnotify_create(struct inode *dir, struct dentry *dentry) { - audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); + audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify_dirent(inode, dentry, FS_CREATE); + fsnotify_dirent(dir, dentry, FS_CREATE); } /* * fsnotify_link - new hardlink in 'inode' directory + * + * Caller must make sure that new_dentry->d_name is stable. * Note: We have to pass also the linked inode ptr as some filesystems leave * new_dentry->d_inode NULL and instantiate inode pointer later */ @@ -209,7 +220,8 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, fsnotify_link_count(inode); audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0); + fsnotify_name(FS_CREATE, inode, FSNOTIFY_EVENT_INODE, + dir, &new_dentry->d_name, 0); } /* @@ -228,7 +240,8 @@ static inline void fsnotify_delete(struct inode *dir, struct inode *inode, if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - fsnotify_name(dir, mask, inode, &dentry->d_name, 0); + fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name, + 0); } /** @@ -263,12 +276,16 @@ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) /* * fsnotify_mkdir - directory 'name' was created + * + * Caller must make sure that dentry->d_name is stable. + * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate + * ->d_inode later */ -static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) +static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry) { - audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); + audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR); + fsnotify_dirent(dir, dentry, FS_CREATE | FS_ISDIR); } /* @@ -362,4 +379,17 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) fsnotify_dentry(dentry, mask); } +static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, + int error) +{ + struct fs_error_report report = { + .error = error, + .inode = inode, + .sb = sb, + }; + + return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, + NULL, NULL, NULL, 0); +} + #endif /* _LINUX_FS_NOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1ce66748a2..d7d96c806b 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -19,6 +19,8 @@ #include #include #include +#include +#include /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily @@ -42,13 +44,18 @@ #define FS_UNMOUNT 0x00002000 /* inode on umount fs */ #define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FS_ERROR 0x00008000 /* Filesystem Error (fanotify) */ + +/* + * FS_IN_IGNORED overloads FS_ERROR. It is only used internally by inotify + * which does not support FS_ERROR. + */ #define FS_IN_IGNORED 0x00008000 /* last inotify event here */ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ -#define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ /* * Set on inode mark that cares about things that happen to its children. * Always set for dnotify and inotify. @@ -56,10 +63,9 @@ */ #define FS_EVENT_ON_CHILD 0x08000000 -#define FS_DN_RENAME 0x10000000 /* file renamed */ +#define FS_RENAME 0x10000000 /* File was renamed */ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ #define FS_ISDIR 0x40000000 /* event occurred against dir */ -#define FS_IN_ONESHOT 0x80000000 /* only send event once */ #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) @@ -69,7 +75,7 @@ * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event * when a directory entry inside a child subdir changes. */ -#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME) #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) @@ -94,12 +100,12 @@ /* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ FS_EVENTS_POSS_ON_CHILD | \ - FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \ - FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED) + FS_DELETE_SELF | FS_MOVE_SELF | \ + FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ + FS_ERROR) /* Extra flags that may be reported with event or control handling of events */ -#define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ - FS_DN_MULTISHOT | FS_EVENT_ON_CHILD) +#define ALL_FSNOTIFY_FLAGS (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT) #define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS) @@ -136,6 +142,7 @@ struct mem_cgroup; * @dir: optional directory associated with event - * if @file_name is not NULL, this is the directory that * @file_name is relative to. + * Either @inode or @dir must be non-NULL. * @file_name: optional file name associated with event * @cookie: inotify rename cookie * @@ -155,7 +162,7 @@ struct fsnotify_ops { const struct qstr *file_name, u32 cookie); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); - void (*free_event)(struct fsnotify_event *event); + void (*free_event)(struct fsnotify_group *group, struct fsnotify_event *event); /* called on final put+free to free memory */ void (*free_mark)(struct fsnotify_mark *mark); }; @@ -204,6 +211,12 @@ struct fsnotify_group { unsigned int priority; bool shutdown; /* group is being shut down, don't queue more events */ +#define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */ +#define FSNOTIFY_GROUP_DUPS 0x02 /* allow multiple marks per object */ +#define FSNOTIFY_GROUP_NOFS 0x04 /* group lock is not direct reclaim safe */ + int flags; + unsigned int owner_flags; /* stored flags of mark_mutex owner */ + /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ struct mutex mark_mutex; /* protect marks_list */ atomic_t user_waits; /* Number of tasks waiting for user @@ -238,16 +251,50 @@ struct fsnotify_group { int flags; /* flags from fanotify_init() */ int f_flags; /* event_f_flags from fanotify_init() */ struct ucounts *ucounts; + mempool_t error_events_pool; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; }; +/* + * These helpers are used to prevent deadlock when reclaiming inodes with + * evictable marks of the same group that is allocating a new mark. + */ +static inline void fsnotify_group_lock(struct fsnotify_group *group) +{ + mutex_lock(&group->mark_mutex); + if (group->flags & FSNOTIFY_GROUP_NOFS) + group->owner_flags = memalloc_nofs_save(); +} + +static inline void fsnotify_group_unlock(struct fsnotify_group *group) +{ + if (group->flags & FSNOTIFY_GROUP_NOFS) + memalloc_nofs_restore(group->owner_flags); + mutex_unlock(&group->mark_mutex); +} + +static inline void fsnotify_group_assert_locked(struct fsnotify_group *group) +{ + WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); + if (group->flags & FSNOTIFY_GROUP_NOFS) + WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS)); +} + /* When calling fsnotify tell it if the data is a path or inode */ enum fsnotify_data_type { FSNOTIFY_EVENT_NONE, FSNOTIFY_EVENT_PATH, FSNOTIFY_EVENT_INODE, + FSNOTIFY_EVENT_DENTRY, + FSNOTIFY_EVENT_ERROR, +}; + +struct fs_error_report { + int error; + struct inode *inode; + struct super_block *sb; }; static inline struct inode *fsnotify_data_inode(const void *data, int data_type) @@ -255,8 +302,25 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type) switch (data_type) { case FSNOTIFY_EVENT_INODE: return (struct inode *)data; + case FSNOTIFY_EVENT_DENTRY: + return d_inode(data); case FSNOTIFY_EVENT_PATH: return d_inode(((const struct path *)data)->dentry); + case FSNOTIFY_EVENT_ERROR: + return ((struct fs_error_report *)data)->inode; + default: + return NULL; + } +} + +static inline struct dentry *fsnotify_data_dentry(const void *data, int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_DENTRY: + /* Non const is needed for dget() */ + return (struct dentry *)data; + case FSNOTIFY_EVENT_PATH: + return ((const struct path *)data)->dentry; default: return NULL; } @@ -273,58 +337,110 @@ static inline const struct path *fsnotify_data_path(const void *data, } } +static inline struct super_block *fsnotify_data_sb(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_INODE: + return ((struct inode *)data)->i_sb; + case FSNOTIFY_EVENT_DENTRY: + return ((struct dentry *)data)->d_sb; + case FSNOTIFY_EVENT_PATH: + return ((const struct path *)data)->dentry->d_sb; + case FSNOTIFY_EVENT_ERROR: + return ((struct fs_error_report *) data)->sb; + default: + return NULL; + } +} + +static inline struct fs_error_report *fsnotify_data_error_report( + const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_ERROR: + return (struct fs_error_report *) data; + default: + return NULL; + } +} + +/* + * Index to merged marks iterator array that correlates to a type of watch. + * The type of watched object can be deduced from the iterator type, but not + * the other way around, because an event can match different watched objects + * of the same object type. + * For example, both parent and child are watching an object of type inode. + */ +enum fsnotify_iter_type { + FSNOTIFY_ITER_TYPE_INODE, + FSNOTIFY_ITER_TYPE_VFSMOUNT, + FSNOTIFY_ITER_TYPE_SB, + FSNOTIFY_ITER_TYPE_PARENT, + FSNOTIFY_ITER_TYPE_INODE2, + FSNOTIFY_ITER_TYPE_COUNT +}; + +/* The type of object that a mark is attached to */ enum fsnotify_obj_type { + FSNOTIFY_OBJ_TYPE_ANY = -1, FSNOTIFY_OBJ_TYPE_INODE, - FSNOTIFY_OBJ_TYPE_PARENT, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; -#define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) -#define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT) -#define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) -#define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) -#define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) - -static inline bool fsnotify_valid_obj_type(unsigned int type) +static inline bool fsnotify_valid_obj_type(unsigned int obj_type) { - return (type < FSNOTIFY_OBJ_TYPE_COUNT); + return (obj_type < FSNOTIFY_OBJ_TYPE_COUNT); } struct fsnotify_iter_info { - struct fsnotify_mark *marks[FSNOTIFY_OBJ_TYPE_COUNT]; + struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT]; + struct fsnotify_group *current_group; unsigned int report_mask; int srcu_idx; }; static inline bool fsnotify_iter_should_report_type( - struct fsnotify_iter_info *iter_info, int type) + struct fsnotify_iter_info *iter_info, int iter_type) { - return (iter_info->report_mask & (1U << type)); + return (iter_info->report_mask & (1U << iter_type)); } static inline void fsnotify_iter_set_report_type( - struct fsnotify_iter_info *iter_info, int type) + struct fsnotify_iter_info *iter_info, int iter_type) +{ + iter_info->report_mask |= (1U << iter_type); +} + +static inline struct fsnotify_mark *fsnotify_iter_mark( + struct fsnotify_iter_info *iter_info, int iter_type) { - iter_info->report_mask |= (1U << type); + if (fsnotify_iter_should_report_type(iter_info, iter_type)) + return iter_info->marks[iter_type]; + return NULL; } -static inline void fsnotify_iter_set_report_type_mark( - struct fsnotify_iter_info *iter_info, int type, - struct fsnotify_mark *mark) +static inline int fsnotify_iter_step(struct fsnotify_iter_info *iter, int type, + struct fsnotify_mark **markp) { - iter_info->marks[type] = mark; - iter_info->report_mask |= (1U << type); + while (type < FSNOTIFY_ITER_TYPE_COUNT) { + *markp = fsnotify_iter_mark(iter, type); + if (*markp) + break; + type++; + } + return type; } #define FSNOTIFY_ITER_FUNCS(name, NAME) \ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ struct fsnotify_iter_info *iter_info) \ { \ - return (iter_info->report_mask & FSNOTIFY_OBJ_TYPE_##NAME##_FL) ? \ - iter_info->marks[FSNOTIFY_OBJ_TYPE_##NAME] : NULL; \ + return fsnotify_iter_mark(iter_info, FSNOTIFY_ITER_TYPE_##NAME); \ } FSNOTIFY_ITER_FUNCS(inode, INODE) @@ -332,8 +448,13 @@ FSNOTIFY_ITER_FUNCS(parent, PARENT) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) FSNOTIFY_ITER_FUNCS(sb, SB) -#define fsnotify_foreach_obj_type(type) \ - for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++) +#define fsnotify_foreach_iter_type(type) \ + for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++) +#define fsnotify_foreach_iter_mark_type(iter, mark, type) \ + for (type = 0; \ + type = fsnotify_iter_step(iter, type, &mark), \ + type < FSNOTIFY_ITER_TYPE_COUNT; \ + type++) /* * fsnotify_connp_t is what we embed in objects which connector can be attached @@ -352,6 +473,7 @@ struct fsnotify_mark_connector { spinlock_t lock; unsigned short type; /* Type of object [lock] */ #define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01 +#define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 unsigned short flags; /* flags [lock] */ __kernel_fsid_t fsid; /* fsid of filesystem containing object */ union { @@ -396,11 +518,18 @@ struct fsnotify_mark { struct hlist_node obj_list; /* Head of list of marks for an object [mark ref] */ struct fsnotify_mark_connector *connector; - /* Events types to ignore [mark->lock, group->mark_mutex] */ - __u32 ignored_mask; -#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x01 -#define FSNOTIFY_MARK_FLAG_ALIVE 0x02 -#define FSNOTIFY_MARK_FLAG_ATTACHED 0x04 + /* Events types and flags to ignore [mark->lock, group->mark_mutex] */ + __u32 ignore_mask; + /* General fsnotify mark flags */ +#define FSNOTIFY_MARK_FLAG_ALIVE 0x0001 +#define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002 + /* inotify mark flags */ +#define FSNOTIFY_MARK_FLAG_EXCL_UNLINK 0x0010 +#define FSNOTIFY_MARK_FLAG_IN_ONESHOT 0x0020 + /* fanotify mark flags */ +#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100 +#define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 +#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400 unsigned int flags; /* flags [mark->lock] */ }; @@ -466,8 +595,9 @@ static inline void fsnotify_update_flags(struct dentry *dentry) /* called from fsnotify listeners, such as fanotify or dnotify */ /* create a new group */ -extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); -extern struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops); +extern struct fsnotify_group *fsnotify_alloc_group( + const struct fsnotify_ops *ops, + int flags); /* get reference to a group */ extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ @@ -482,16 +612,30 @@ extern int fsnotify_fasync(int fd, struct file *file, int on); extern void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event); /* attach the event to the group notification queue */ -extern int fsnotify_add_event(struct fsnotify_group *group, - struct fsnotify_event *event, - int (*merge)(struct fsnotify_group *, - struct fsnotify_event *), - void (*insert)(struct fsnotify_group *, - struct fsnotify_event *)); +extern int fsnotify_insert_event(struct fsnotify_group *group, + struct fsnotify_event *event, + int (*merge)(struct fsnotify_group *, + struct fsnotify_event *), + void (*insert)(struct fsnotify_group *, + struct fsnotify_event *)); + +static inline int fsnotify_add_event(struct fsnotify_group *group, + struct fsnotify_event *event, + int (*merge)(struct fsnotify_group *, + struct fsnotify_event *)) +{ + return fsnotify_insert_event(group, event, merge, NULL); +} + /* Queue overflow event to a notification group */ static inline void fsnotify_queue_overflow(struct fsnotify_group *group) { - fsnotify_add_event(group, group->overflow_event, NULL, NULL); + fsnotify_add_event(group, group->overflow_event, NULL); +} + +static inline bool fsnotify_is_overflow_event(u32 mask) +{ + return mask & FS_Q_OVERFLOW; } static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) @@ -512,6 +656,101 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group, /* functions used to manipulate the marks attached to inodes */ +/* + * Canonical "ignore mask" including event flags. + * + * Note the subtle semantic difference from the legacy ->ignored_mask. + * ->ignored_mask traditionally only meant which events should be ignored, + * while ->ignore_mask also includes flags regarding the type of objects on + * which events should be ignored. + */ +static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark) +{ + __u32 ignore_mask = mark->ignore_mask; + + /* The event flags in ignore mask take effect */ + if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) + return ignore_mask; + + /* + * Legacy behavior: + * - Always ignore events on dir + * - Ignore events on child if parent is watching children + */ + ignore_mask |= FS_ISDIR; + ignore_mask &= ~FS_EVENT_ON_CHILD; + ignore_mask |= mark->mask & FS_EVENT_ON_CHILD; + + return ignore_mask; +} + +/* Legacy ignored_mask - only event types to ignore */ +static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark) +{ + return mark->ignore_mask & ALL_FSNOTIFY_EVENTS; +} + +/* + * Check if mask (or ignore mask) should be applied depending if victim is a + * directory and whether it is reported to a watching parent. + */ +static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir, + int iter_type) +{ + /* Should mask be applied to a directory? */ + if (is_dir && !(mask & FS_ISDIR)) + return false; + + /* Should mask be applied to a child? */ + if (iter_type == FSNOTIFY_ITER_TYPE_PARENT && + !(mask & FS_EVENT_ON_CHILD)) + return false; + + return true; +} + +/* + * Effective ignore mask taking into account if event victim is a + * directory and whether it is reported to a watching parent. + */ +static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark, + bool is_dir, int iter_type) +{ + __u32 ignore_mask = fsnotify_ignored_events(mark); + + if (!ignore_mask) + return 0; + + /* For non-dir and non-child, no need to consult the event flags */ + if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT) + return ignore_mask; + + ignore_mask = fsnotify_ignore_mask(mark); + if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type)) + return 0; + + return ignore_mask & ALL_FSNOTIFY_EVENTS; +} + +/* Get mask for calculating object interest taking ignore mask into account */ +static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark) +{ + __u32 mask = mark->mask; + + if (!fsnotify_ignored_events(mark)) + return mask; + + /* Interest in FS_MODIFY may be needed for clearing ignore mask */ + if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) + mask |= FS_MODIFY; + + /* + * If mark is interested in ignoring events on children, the object must + * show interest in those events for fsnotify_parent() to notice it. + */ + return mask | mark->ignore_mask; +} + /* Get mask of events for a list of marks */ extern __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn); /* Calculate mask of events for a list of marks */ @@ -526,27 +765,27 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, __kernel_fsid_t *fsid); /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int type, - int allow_dups, __kernel_fsid_t *fsid); + fsnotify_connp_t *connp, unsigned int obj_type, + int add_flags, __kernel_fsid_t *fsid); extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int type, int allow_dups, + unsigned int obj_type, int add_flags, __kernel_fsid_t *fsid); /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct inode *inode, - int allow_dups) + int add_flags) { return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL); + FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, - int allow_dups) + int add_flags) { return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, allow_dups, + FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL); } @@ -559,22 +798,23 @@ extern void fsnotify_detach_mark(struct fsnotify_mark *mark); extern void fsnotify_free_mark(struct fsnotify_mark *mark); /* Wait until all marks queued for destruction are destroyed */ extern void fsnotify_wait_marks_destroyed(void); -/* run all the marks in a group, and clear all of the marks attached to given object type */ -extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type); +/* Clear all of the marks of a group attached to a given object type */ +extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, + unsigned int obj_type); /* run all the marks in a group, and clear all of the vfsmount marks */ static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); } /* run all the marks in a group, and clear all of the inode marks */ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); } /* run all the marks in a group, and clear all of the sn marks */ static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); } extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 55b2ec1f96..d1ca5eb56f 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -660,6 +660,15 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); extern void pm_restrict_gfp_mask(void); extern void pm_restore_gfp_mask(void); +/* + * Check if the gfp flags allow compaction - GFP_NOIO is a really + * tricky context because the migration might require IO. + */ +static inline bool gfp_compaction_allowed(gfp_t gfp_mask) +{ + return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO); +} + extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 8499fc9220..edcfddaa5f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -164,8 +164,28 @@ struct hv_ring_buffer { u8 buffer[]; } __packed; + +/* + * If the requested ring buffer size is at least 8 times the size of the + * header, steal space from the ring buffer for the header. Otherwise, add + * space for the header so that is doesn't take too much of the ring buffer + * space. + * + * The factor of 8 is somewhat arbitrary. The goal is to prevent adding a + * relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring + * buffer size (such as 128 Kbytes) and so end up making a nearly twice as + * large allocation that will be almost half wasted. As a contrasting example, + * on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the + * header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring. + * In this latter case, we must add 64 Kbytes for the header and not worry + * about what's wasted. + */ +#define VMBUS_HEADER_ADJ(payload_sz) \ + ((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \ + 0 : sizeof(struct hv_ring_buffer)) + /* Calculate the proper size of a ringbuffer, it must be page-aligned */ -#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \ +#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \ (payload_sz)) struct hv_ring_buffer_info { diff --git a/include/linux/iova.h b/include/linux/iova.h index 6b6cc104e3..9aa0acf982 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -137,6 +137,8 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) int iova_cache_get(void); void iova_cache_put(void); +unsigned long iova_rcache_range(void); + void free_iova(struct iova_domain *iovad, unsigned long pfn); void __free_iova(struct iova_domain *iovad, struct iova *iova); struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 747f40e0c3..37738ec87d 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -133,7 +133,7 @@ do { \ # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) false -# define lockdep_hrtimer_exit(__context) do { } while (0) +# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0) # define lockdep_posixtimer_enter() do { } while (0) # define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) diff --git a/include/linux/kthread.h b/include/linux/kthread.h index db47aae7c4..8e21bd13c3 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -95,6 +95,7 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); +void kthread_exit(long result) __noreturn; int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/linkage.h b/include/linux/linkage.h index dbf8506dec..fc81e51330 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -69,8 +69,8 @@ #endif #ifndef __ALIGN -#define __ALIGN .align 4,0x90 -#define __ALIGN_STR ".align 4,0x90" +#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT +#define __ALIGN_STR __stringify(__ALIGN) #endif #ifdef __ASSEMBLY__ diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c4ae6506b8..70ce419e27 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -292,6 +292,7 @@ void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t); __be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **, struct nlm_lock *); void nlm_release_file(struct nlm_file *); +void nlmsvc_put_lockowner(struct nlm_lockowner *); void nlmsvc_release_lockowner(struct nlm_lock *); void nlmsvc_mark_resources(struct net *); void nlmsvc_free_host_resources(struct nlm_host *); @@ -303,10 +304,15 @@ void nlmsvc_invalidate_all(void); int nlmsvc_unlock_all_by_sb(struct super_block *sb); int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr); +static inline struct file *nlmsvc_file_file(struct nlm_file *file) +{ + return file->f_file[O_RDONLY] ? + file->f_file[O_RDONLY] : file->f_file[O_WRONLY]; +} + static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { - return locks_inode(file->f_file[O_RDONLY] ? - file->f_file[O_RDONLY] : file->f_file[O_WRONLY]); + return locks_inode(nlmsvc_file_file(file)); } static inline int __nlm_privileged_request4(const struct sockaddr *sap) diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index bed63156b0..67e4a2c550 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -98,18 +98,19 @@ struct nlm_reboot { */ #define NLMSVC_XDRSIZE sizeof(struct nlm_args) -int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_testres(struct svc_rqst *, __be32 *); -int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); -int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); -int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_res(struct svc_rqst *, __be32 *); -int nlmsvc_decode_res(struct svc_rqst *, __be32 *); -int nlmsvc_encode_void(struct svc_rqst *, __be32 *); -int nlmsvc_decode_void(struct svc_rqst *, __be32 *); -int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *); -int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); -int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); +bool nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); + +bool nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); #endif /* LOCKD_XDR_H */ diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 025250ade9..72831e35dc 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -22,22 +22,21 @@ #define nlm4_fbig cpu_to_be32(NLM_FBIG) #define nlm4_failed cpu_to_be32(NLM_FAILED) - - void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len); -int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_testres(struct svc_rqst *, __be32 *); -int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); -int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); -int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_res(struct svc_rqst *, __be32 *); -int nlm4svc_decode_res(struct svc_rqst *, __be32 *); -int nlm4svc_encode_void(struct svc_rqst *, __be32 *); -int nlm4svc_decode_void(struct svc_rqst *, __be32 *); -int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *); -int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); -int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); +bool nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); + +bool nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); extern const struct rpc_version nlm_version4; diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 5433c08fcc..1aea34b8f1 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -51,6 +51,23 @@ */ #define max(x, y) __careful_cmp(x, y, >) +/** + * umin - return minimum of two non-negative values + * Signed types are zero extended to match a larger unsigned type. + * @x: first value + * @y: second value + */ +#define umin(x, y) \ + __careful_cmp((x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull, <) + +/** + * umax - return maximum of two non-negative values + * @x: first value + * @y: second value + */ +#define umax(x, y) \ + __careful_cmp((x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull, >) + /** * min3 - return minimum of three values * @x: first value diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 4ae2f2908f..d4a1567c94 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -20,6 +20,7 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, unsigned int debounce); int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, unsigned int idx, unsigned int debounce); +int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config); void mmc_gpio_set_cd_isr(struct mmc_host *host, irqreturn_t (*isr)(int irq, void *dev_id)); int mmc_gpio_set_cd_wake(struct mmc_host *host, bool on); diff --git a/include/linux/module.h b/include/linux/module.h index 701c150485..fb9762e16f 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -595,9 +595,9 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name); -extern void __noreturn __module_put_and_exit(struct module *mod, +extern void __noreturn __module_put_and_kthread_exit(struct module *mod, long code); -#define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code) +#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code) #ifdef CONFIG_MODULE_UNLOAD int module_refcount(struct module *mod); @@ -790,7 +790,7 @@ static inline int unregister_module_notifier(struct notifier_block *nb) return 0; } -#define module_put_and_exit(code) do_exit(code) +#define module_put_and_kthread_exit(code) kthread_exit(code) static inline void print_modules(void) { diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 0dc7ad38a0..b06375e88e 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -36,14 +36,6 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc memcpy(target->data, source->data, source->size); } - -/* - * This is really a general kernel constant, but since nothing like - * this is defined in the kernel headers, I have to do it here. - */ -#define NFS_OFFSET_MAX ((__s64)((~(__u64)0) >> 1)) - - enum nfs3_stable_how { NFS_UNSTABLE = 0, NFS_DATA_SYNC = 1, diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 15004c4698..8e2b532919 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -292,6 +292,10 @@ enum nfsstat4 { NFS4ERR_XATTR2BIG = 10096, }; +/* error codes for internal client use */ +#define NFS4ERR_RESET_TO_MDS 12001 +#define NFS4ERR_RESET_TO_PNFS 12002 + static inline bool seqid_mutating_err(u32 err) { /* See RFC 7530, section 9.1.7 */ @@ -726,4 +730,17 @@ enum nfs4_setxattr_options { SETXATTR4_CREATE = 1, SETXATTR4_REPLACE = 2, }; + +enum { + RCA4_TYPE_MASK_RDATA_DLG = 0, + RCA4_TYPE_MASK_WDATA_DLG = 1, + RCA4_TYPE_MASK_DIR_DLG = 2, + RCA4_TYPE_MASK_FILE_LAYOUT = 3, + RCA4_TYPE_MASK_BLK_LAYOUT = 4, + RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8, + RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9, + RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12, + RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, +}; + #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5ddc30405f..886bfa99a6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -588,6 +588,7 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo); bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); static inline int diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h index 222ae8883e..22265b1ff0 100644 --- a/include/linux/nfs_ssc.h +++ b/include/linux/nfs_ssc.h @@ -53,6 +53,7 @@ static inline void nfs42_ssc_close(struct file *filep) if (nfs_ssc_client_tbl.ssc_nfs4_ops) (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep); } +#endif struct nfsd4_ssc_umount_item { struct list_head nsui_list; @@ -64,9 +65,8 @@ struct nfsd4_ssc_umount_item { refcount_t nsui_refcnt; unsigned long nsui_expire; struct vfsmount *nsui_vfsmount; - char nsui_ipaddr[RPC_MAX_ADDRBUFLEN]; + char nsui_ipaddr[RPC_MAX_ADDRBUFLEN + 1]; }; -#endif /* * NFS_FS diff --git a/include/linux/pci.h b/include/linux/pci.h index 08d26b58f9..25e2e7756b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -455,6 +455,7 @@ struct pci_dev { unsigned int link_active_reporting:1;/* Device capable of reporting link active */ unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */ unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ + unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h index 3a35e74cdc..b30343a1f4 100644 --- a/include/linux/phy/tegra/xusb.h +++ b/include/linux/phy/tegra/xusb.h @@ -24,6 +24,7 @@ int tegra_xusb_padctl_set_vbus_override(struct tegra_xusb_padctl *padctl, int tegra_phy_xusb_utmi_port_reset(struct phy *phy); int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, unsigned int port); +int tegra_xusb_padctl_get_port_number(struct phy *phy); int tegra_xusb_padctl_enable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy, enum usb_device_speed speed); int tegra_xusb_padctl_disable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy); diff --git a/include/linux/printk.h b/include/linux/printk.h index 9497f6b983..c4fb848221 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -129,7 +129,7 @@ struct va_format { #define no_printk(fmt, ...) \ ({ \ if (0) \ - printk(fmt, ##__VA_ARGS__); \ + _printk(fmt, ##__VA_ARGS__); \ 0; \ }) diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index d373f1bcbf..5d52d15fae 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -58,7 +58,7 @@ DECLARE_PER_CPU(u32, kstack_offset); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - offset ^= (rand); \ + offset = ror32(offset, 5) ^ (rand); \ raw_cpu_write(kstack_offset, offset); \ } \ } while (0) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 3c7d295746..3e7bfc0f65 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -98,6 +98,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) +typedef bool (*ring_buffer_cond_fn)(void *data); int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full); diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 988528b5da..48ffe32518 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -14,10 +14,10 @@ static inline bool page_is_secretmem(struct page *page) * Using page_mapping() is quite slow because of the actual call * instruction and repeated compound_head(page) inside the * page_mapping() function. - * We know that secretmem pages are not compound and LRU so we can + * We know that secretmem pages are not compound, so we can * save a couple of cycles here. */ - if (PageCompound(page) || !PageLRU(page)) + if (PageCompound(page)) return false; mapping = (struct address_space *) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index d5b6b1550d..4d3ce67e85 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -260,6 +260,85 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/** + * uart_port_lock - Lock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock(struct uart_port *up) +{ + spin_lock(&up->lock); +} + +/** + * uart_port_lock_irq - Lock the UART port and disable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock_irq(struct uart_port *up) +{ + spin_lock_irq(&up->lock); +} + +/** + * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + */ +static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_trylock - Try to lock the UART port + * @up: Pointer to UART port structure + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock(struct uart_port *up) +{ + return spin_trylock(&up->lock); +} + +/** + * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) +{ + return spin_trylock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_unlock - Unlock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock(struct uart_port *up) +{ + spin_unlock(&up->lock); +} + +/** + * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock_irq(struct uart_port *up) +{ + spin_unlock_irq(&up->lock); +} + +/** + * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts + * @up: Pointer to UART port structure + * @flags: The saved interrupt flags for restore + */ +static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7ed1d4472c..15de91c65a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2735,6 +2735,21 @@ static inline void skb_mac_header_rebuild(struct sk_buff *skb) } } +/* Move the full mac header up to current network_header. + * Leaves skb->data pointing at offset skb->mac_len into the mac_header. + * Must be provided the complete mac header length. + */ +static inline void skb_mac_header_rebuild_full(struct sk_buff *skb, u32 full_mac_len) +{ + if (skb_mac_header_was_set(skb)) { + const unsigned char *old_mac = skb_mac_header(skb); + + skb_set_mac_header(skb, -full_mac_len); + memmove(skb_mac_header(skb), old_mac, full_mac_len); + __skb_push(skb, full_mac_len - skb->mac_len); + } +} + static inline int skb_checksum_start_offset(const struct sk_buff *skb) { return skb->csum_start - skb_headroom(skb); diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 4273505d30..422b391d93 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -73,7 +73,6 @@ struct sk_psock_link { }; struct sk_psock_work_state { - struct sk_buff *skb; u32 len; u32 off; }; @@ -107,7 +106,7 @@ struct sk_psock { struct proto *sk_proto; struct mutex work_mutex; struct sk_psock_work_state work_state; - struct work_struct work; + struct delayed_work work; struct rcu_work rwork; }; @@ -462,10 +461,12 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) { + read_lock_bh(&sk->sk_callback_lock); if (psock->saved_data_ready) psock->saved_data_ready(sk); else sk->sk_data_ready(sk); + read_unlock_bh(&sk->sk_callback_lock); } static inline void psock_set_prog(struct bpf_prog **pprog, diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 71ec22b1df..9c5197c360 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -130,6 +130,7 @@ struct rpc_create_args { const char *servername; const char *nodename; const struct rpc_program *program; + struct rpc_stat *stats; u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index ba047a145e..f5fe330e4f 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -199,7 +199,7 @@ struct rpc_wait_queue { unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ - unsigned short qlen; /* total # tasks waiting in queue */ + unsigned int qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) const char * name; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 664a54e330..6e48c1c88f 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -52,25 +52,6 @@ struct svc_pool { unsigned long sp_flags; } ____cacheline_aligned_in_smp; -struct svc_serv; - -struct svc_serv_ops { - /* Callback to use when last thread exits. */ - void (*svo_shutdown)(struct svc_serv *, struct net *); - - /* function for service threads to run */ - int (*svo_function)(void *); - - /* queue up a transport for servicing */ - void (*svo_enqueue_xprt)(struct svc_xprt *); - - /* set up thread (or whatever) execution context */ - int (*svo_setup)(struct svc_serv *, struct svc_pool *, int); - - /* optional module to count when adding threads (pooled svcs only) */ - struct module *svo_module; -}; - /* * RPC service. * @@ -85,6 +66,7 @@ struct svc_serv { struct svc_program * sv_program; /* RPC program */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; + struct kref sv_refcnt; unsigned int sv_nrthreads; /* # of server threads */ unsigned int sv_maxconn; /* max connections allowed or * '0' causing max to be based @@ -102,7 +84,8 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - const struct svc_serv_ops *sv_ops; /* server operations */ + int (*sv_threadfn)(void *data); + #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -114,15 +97,30 @@ struct svc_serv { #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; -/* - * We use sv_nrthreads as a reference count. svc_destroy() drops - * this refcount, so we need to bump it up around operations that - * change the number of threads. Horrible, but there it is. - * Should be called with the "service mutex" held. +/** + * svc_get() - increment reference count on a SUNRPC serv + * @serv: the svc_serv to have count incremented + * + * Returns: the svc_serv that was passed in. + */ +static inline struct svc_serv *svc_get(struct svc_serv *serv) +{ + kref_get(&serv->sv_refcnt); + return serv; +} + +void svc_destroy(struct kref *); + +/** + * svc_put - decrement reference count on a SUNRPC serv + * @serv: the svc_serv to have count decremented + * + * When the reference count reaches zero, svc_destroy() + * is called to clean up and free the serv. */ -static inline void svc_get(struct svc_serv *serv) +static inline void svc_put(struct svc_serv *serv) { - serv->sv_nrthreads++; + kref_put(&serv->sv_refcnt, svc_destroy); } /* @@ -456,41 +454,21 @@ struct svc_procedure { /* process the request: */ __be32 (*pc_func)(struct svc_rqst *); /* XDR decode args: */ - int (*pc_decode)(struct svc_rqst *, __be32 *data); + bool (*pc_decode)(struct svc_rqst *rqstp, + struct xdr_stream *xdr); /* XDR encode result: */ - int (*pc_encode)(struct svc_rqst *, __be32 *data); + bool (*pc_encode)(struct svc_rqst *rqstp, + struct xdr_stream *xdr); /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ + unsigned int pc_argzero; /* how much of argument to clear */ unsigned int pc_ressize; /* result struct size */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ const char * pc_name; /* for display */ }; -/* - * Mode for mapping cpus to pools. - */ -enum { - SVC_POOL_AUTO = -1, /* choose one of the others */ - SVC_POOL_GLOBAL, /* no mapping, just a single global pool - * (legacy & UP mode) */ - SVC_POOL_PERCPU, /* one pool per cpu */ - SVC_POOL_PERNODE /* one pool per numa node */ -}; - -struct svc_pool_map { - int count; /* How many svc_servs use us */ - int mode; /* Note: int not enum to avoid - * warnings about "enumeration value - * not handled in switch" */ - unsigned int npools; - unsigned int *pool_to; /* maps pool id to cpu or node */ - unsigned int *to_pool; /* maps cpu or node to pool id */ -}; - -extern struct svc_pool_map svc_pool_map; - /* * Function prototypes. */ @@ -498,24 +476,17 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - const struct svc_serv_ops *); + int (*threadfn)(void *data)); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); -struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, - struct svc_pool *pool, int node); void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); -unsigned int svc_pool_map_get(void); -void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - const struct svc_serv_ops *); + int (*threadfn)(void *data)); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); -int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); -void svc_destroy(struct svc_serv *); -void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, struct svc_rqst *); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 154eee6bc6..1f7368f5b4 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -127,15 +127,16 @@ int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_create_xprt(struct svc_serv *, const char *, struct net *, - const int, const unsigned short, int, - const struct cred *); +int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, + struct net *net, const int family, + const unsigned short port, int flags, + const struct cred *cred); +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net); void svc_xprt_received(struct svc_xprt *xprt); -void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); -void svc_close_xprt(struct svc_xprt *xprt); +void svc_xprt_close(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index bcc555c7ae..13aff355d5 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -59,10 +59,9 @@ int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); -bool svc_alien_sock(struct net *net, int fd); -int svc_addsock(struct svc_serv *serv, const int fd, - char *name_return, const size_t len, - const struct cred *cred); +int svc_addsock(struct svc_serv *serv, struct net *net, + const int fd, char *name_return, const size_t len, + const struct cred *cred); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot); diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 3a2c714d6b..98e197376a 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -240,6 +240,8 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); +extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes); diff --git a/include/linux/timer.h b/include/linux/timer.h index fda13c9d12..e78521bce5 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -183,12 +183,20 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); +extern int timer_delete_sync(struct timer_list *timer); -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) - extern int del_timer_sync(struct timer_list *timer); -#else -# define del_timer_sync(t) del_timer(t) -#endif +/** + * del_timer_sync - Delete a pending timer and wait for a running callback + * @timer: The timer to be deleted + * + * See timer_delete_sync() for detailed explanation. + * + * Do not use in new code. Use timer_delete_sync() instead. + */ +static inline int del_timer_sync(struct timer_list *timer) +{ + return timer_delete_sync(timer); +} #define del_singleshot_timer_sync(t) del_timer_sync(t) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index d3cbe4bf4f..17575aa2a5 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -676,7 +676,7 @@ struct trace_event_file { } \ early_initcall(trace_init_perf_perm_##name); -#define PERF_MAX_TRACE_SIZE 2048 +#define PERF_MAX_TRACE_SIZE 8192 #define MAX_FILTER_STR_VAL 256U /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index e81856c0ba..6a0f2097d3 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -66,7 +66,7 @@ #include struct u64_stats_sync { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) seqcount_t seq; #endif }; @@ -115,7 +115,7 @@ static inline void u64_stats_inc(u64_stats_t *p) } #endif -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) #define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) #else static inline void u64_stats_init(struct u64_stats_sync *syncp) @@ -125,15 +125,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp) static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); write_seqcount_begin(&syncp->seq); #endif } static inline void u64_stats_update_end(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) write_seqcount_end(&syncp->seq); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); #endif } @@ -142,8 +146,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) { unsigned long flags = 0; -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - local_irq_save(flags); +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); + else + local_irq_save(flags); write_seqcount_begin(&syncp->seq); #endif return flags; @@ -153,15 +160,18 @@ static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, unsigned long flags) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) write_seqcount_end(&syncp->seq); - local_irq_restore(flags); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); + else + local_irq_restore(flags); #endif } static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) return read_seqcount_begin(&syncp->seq); #else return 0; @@ -170,7 +180,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync * static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) preempt_disable(); #endif return __u64_stats_fetch_begin(syncp); @@ -179,7 +189,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) return read_seqcount_retry(&syncp->seq, start); #else return false; @@ -189,7 +199,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) preempt_enable(); #endif return __u64_stats_fetch_retry(syncp, start); @@ -203,7 +213,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, */ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) + preempt_disable(); +#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) local_irq_disable(); #endif return __u64_stats_fetch_begin(syncp); @@ -212,7 +224,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) + preempt_enable(); +#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) local_irq_enable(); #endif return __u64_stats_fetch_retry(syncp, start); diff --git a/include/linux/udp.h b/include/linux/udp.h index 0727276e75..fdf5afb393 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -132,6 +132,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); +#if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +#endif + +static inline bool udp_encap_needed(void) +{ + if (static_branch_unlikely(&udp_encap_needed_key)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (static_branch_unlikely(&udpv6_encap_needed_key)) + return true; +#endif + + return false; +} + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) @@ -143,6 +161,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) return true; + /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still + * land in a tunnel as the socket check in udp_gro_receive cannot be + * foolproof. + */ + if (udp_encap_needed() && + READ_ONCE(udp_sk(sk)->encap_rcv) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) + return true; + return false; } diff --git a/include/linux/vfio.h b/include/linux/vfio.h index b53a955788..b7275ed44e 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -243,6 +243,7 @@ struct virqfd { wait_queue_entry_t wait; poll_table pt; struct work_struct shutdown; + struct work_struct flush_inject; struct virqfd **pvirqfd; }; @@ -251,5 +252,6 @@ extern int vfio_virqfd_enable(void *opaque, void (*thread)(void *, void *), void *data, struct virqfd **pvirqfd, int fd); extern void vfio_virqfd_disable(struct virqfd **pvirqfd); +void vfio_virqfd_flush_thread(struct virqfd **pvirqfd); #endif /* VFIO_H */ diff --git a/include/media/cec.h b/include/media/cec.h index 77346f7570..38eb9334d8 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -120,14 +120,16 @@ struct cec_adap_ops { int (*adap_log_addr)(struct cec_adapter *adap, u8 logical_addr); int (*adap_transmit)(struct cec_adapter *adap, u8 attempts, u32 signal_free_time, struct cec_msg *msg); + void (*adap_nb_transmit_canceled)(struct cec_adapter *adap, + const struct cec_msg *msg); void (*adap_status)(struct cec_adapter *adap, struct seq_file *file); void (*adap_free)(struct cec_adapter *adap); - /* Error injection callbacks */ + /* Error injection callbacks, called without adap->lock held */ int (*error_inj_show)(struct cec_adapter *adap, struct seq_file *sf); bool (*error_inj_parse_line)(struct cec_adapter *adap, char *line); - /* High-level CEC message callback */ + /* High-level CEC message callback, called without adap->lock held */ int (*received)(struct cec_adapter *adap, struct cec_msg *msg); }; @@ -163,6 +165,11 @@ struct cec_adap_ops { * @wait_queue: queue of transmits waiting for a reply * @transmitting: CEC messages currently being transmitted * @transmit_in_progress: true if a transmit is in progress + * @transmit_in_progress_aborted: true if a transmit is in progress is to be + * aborted. This happens if the logical address is + * invalidated while the transmit is ongoing. In that + * case the transmit will finish, but will not retransmit + * and be marked as ABORTED. * @kthread_config: kthread used to configure a CEC adapter * @config_completion: used to signal completion of the config kthread * @kthread: main CEC processing thread @@ -175,6 +182,7 @@ struct cec_adap_ops { * @needs_hpd: if true, then the HDMI HotPlug Detect pin must be high * in order to transmit or receive CEC messages. This is usually a HW * limitation. + * @is_enabled: the CEC adapter is enabled * @is_configuring: the CEC adapter is configuring (i.e. claiming LAs) * @is_configured: the CEC adapter is configured (i.e. has claimed LAs) * @cec_pin_is_high: if true then the CEC pin is high. Only used with the @@ -217,6 +225,7 @@ struct cec_adapter { struct list_head wait_queue; struct cec_data *transmitting; bool transmit_in_progress; + bool transmit_in_progress_aborted; struct task_struct *kthread_config; struct completion config_completion; @@ -231,6 +240,8 @@ struct cec_adapter { u16 phys_addr; bool needs_hpd; + bool is_enabled; + bool is_claiming_log_addrs; bool is_configuring; bool is_configured; bool cec_pin_is_high; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 700a19e045..5cf1a73774 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -435,6 +435,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) refcount_inc(&ifp->refcnt); } +static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp) +{ + return refcount_inc_not_zero(&ifp->refcnt); +} /* * compute link-local solicited-node multicast address diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 32d21983c6..b2c79f18f6 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -56,7 +56,7 @@ struct unix_sock { struct mutex iolock, bindlock; struct sock *peer; struct list_head link; - atomic_long_t inflight; + unsigned long inflight; spinlock_t lock; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 @@ -80,6 +80,9 @@ enum unix_socket_lock_class { U_LOCK_NORMAL, U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ + U_LOCK_GC_LISTENER, /* used for listening socket while determining gc + * candidates to close a small race window. + */ }; static inline void unix_state_lock_nested(struct sock *sk, diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 6ed07844eb..5290781abb 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -257,6 +257,7 @@ struct ieee802154_llsec_key { struct ieee802154_llsec_key_entry { struct list_head list; + struct rcu_head rcu; struct ieee802154_llsec_key_id id; struct ieee802154_llsec_key *key; diff --git a/include/net/dsa.h b/include/net/dsa.h index bec439c4a0..e57d6e65f2 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -705,6 +705,14 @@ struct dsa_switch_ops { struct phy_device *phy); void (*port_disable)(struct dsa_switch *ds, int port); + /* + * Compatibility between device trees defining multiple CPU ports and + * drivers which are not OK to use by default the numerically smallest + * CPU port of a switch for its local ports. This can return NULL, + * meaning "don't know/don't care". + */ + struct dsa_port *(*preferred_default_local_cpu_port)(struct dsa_switch *ds); + /* * Port's MAC EEE settings */ diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 632086b2f6..3ae2fda295 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -24,7 +24,7 @@ struct dst_ops { void (*destroy)(struct dst_entry *); void (*ifdown)(struct dst_entry *, struct net_device *dev, int how); - struct dst_entry * (*negative_advice)(struct dst_entry *); + void (*negative_advice)(struct sock *sk, struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 56f1286583..f89320b6fe 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -48,6 +48,22 @@ struct sock *__inet6_lookup_established(struct net *net, const u16 hnum, const int dif, const int sdif); +typedef u32 (inet6_ehashfn_t)(const struct net *net, + const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const __be16 fport); + +inet6_ehashfn_t inet6_ehashfn; + +INDIRECT_CALLABLE_DECLARE(inet6_ehashfn_t udp6_ehashfn); + +struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum, + inet6_ehashfn_t *ehashfn); + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 798aad2169..b6b7e210f9 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -169,6 +169,7 @@ void inet_csk_init_xmit_timers(struct sock *sk, void (*delack_handler)(struct timer_list *), void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); +void inet_csk_clear_xmit_timers_sync(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) { diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index a7a8e66a1b..5849f81640 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -279,6 +279,20 @@ struct sock *__inet_lookup_established(struct net *net, const __be32 daddr, const u16 hnum, const int dif, const int sdif); +typedef u32 (inet_ehashfn_t)(const struct net *net, + const __be32 laddr, const __u16 lport, + const __be32 faddr, const __be16 fport); + +inet_ehashfn_t inet_ehashfn; + +INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn); + +struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum, + inet_ehashfn_t *ehashfn); + static inline struct sock * inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, @@ -348,10 +362,6 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, refcounted); } -u32 inet6_ehashfn(const struct net *net, - const struct in6_addr *laddr, const u16 lport, - const struct in6_addr *faddr, const __be16 fport); - static inline void sk_daddr_set(struct sock *sk, __be32 addr) { sk->sk_daddr = addr; /* alias of inet_daddr */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 17ec652e8f..eca36edb85 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -332,6 +332,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) return pskb_network_may_pull(skb, nhlen); } +/* Variant of pskb_inet_may_pull(). + */ +static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) +{ + int nhlen = 0, maclen = ETH_HLEN; + __be16 type = skb->protocol; + + /* Essentially this is skb_protocol(skb, true) + * And we get MAC len. + */ + if (eth_type_vlan(type)) + type = __vlan_get_protocol(skb, type, &maclen); + + switch (type) { +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + nhlen = sizeof(struct ipv6hdr); + break; +#endif + case htons(ETH_P_IP): + nhlen = sizeof(struct iphdr); + break; + } + /* For ETH_P_IPV6/ETH_P_IP we make sure to pull + * a base network header in skb->head. + */ + if (!pskb_may_pull(skb, maclen + nhlen)) + return false; + + skb_set_network_header(skb, maclen); + return true; +} + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2ba326f9e0..c47baa623b 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -26,6 +26,9 @@ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #endif +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) +#include +#endif #include #include #include @@ -138,6 +141,9 @@ struct net { #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) struct netns_nftables nft; #endif +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) + struct netns_ft ft; +#endif #endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index dabd84fa3f..8e98fb8edf 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -318,7 +318,7 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, int nf_flow_table_offload_init(void); void nf_flow_table_offload_exit(void); -static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) { __be16 proto; @@ -334,4 +334,35 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) return 0; } +static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) +{ + if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + return false; + + *inner_proto = __nf_flow_pppoe_proto(skb); + + return true; +} + +#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) +#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) +#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ + this_cpu_inc((net)->ft.stat->count) +#define NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count) \ + this_cpu_dec((net)->ft.stat->count) + +#ifdef CONFIG_NF_FLOW_TABLE_PROCFS +int nf_flow_table_init_proc(struct net *net); +void nf_flow_table_fini_proc(struct net *net); +#else +static inline int nf_flow_table_init_proc(struct net *net) +{ + return 0; +} + +static inline void nf_flow_table_fini_proc(struct net *net) +{ +} +#endif /* CONFIG_NF_FLOW_TABLE_PROCFS */ + #endif /* _NF_FLOW_TABLE_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 9dfa11d422..315869fc3f 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -74,16 +74,6 @@ struct nft_payload { u8 dreg; }; -struct nft_payload_set { - enum nft_payload_bases base:8; - u8 offset; - u8 len; - u8 sreg; - u8 csum_type; - u8 csum_offset; - u8 csum_flags; -}; - extern const struct nft_expr_ops nft_payload_fast_ops; extern const struct nft_expr_ops nft_bitwise_fast_ops; diff --git a/include/net/netns/flow_table.h b/include/net/netns/flow_table.h new file mode 100644 index 0000000000..1c5fc657e2 --- /dev/null +++ b/include/net/netns/flow_table.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NETNS_FLOW_TABLE_H +#define __NETNS_FLOW_TABLE_H + +struct nf_flow_table_stat { + unsigned int count_wq_add; + unsigned int count_wq_del; + unsigned int count_wq_stats; +}; + +struct netns_ft { + struct nf_flow_table_stat __percpu *stat; +}; +#endif diff --git a/include/net/sock.h b/include/net/sock.h index e19eebaf59..b8de579b91 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1751,6 +1751,13 @@ static inline void sock_owned_by_me(const struct sock *sk) #endif } +static inline void sock_not_owned_by_me(const struct sock *sk) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks); +#endif +} + static inline bool sock_owned_by_user(const struct sock *sk) { sock_owned_by_me(sk); @@ -2075,17 +2082,10 @@ sk_dst_get(struct sock *sk) static inline void __dst_negative_advice(struct sock *sk) { - struct dst_entry *ndst, *dst = __sk_dst_get(sk); + struct dst_entry *dst = __sk_dst_get(sk); - if (dst && dst->ops->negative_advice) { - ndst = dst->ops->negative_advice(dst); - - if (ndst != dst) { - rcu_assign_pointer(sk->sk_dst_cache, ndst); - sk_tx_queue_clear(sk); - WRITE_ONCE(sk->sk_dst_pending_confirm, 0); - } - } + if (dst && dst->ops->negative_advice) + dst->ops->negative_advice(sk, dst); } static inline void dst_negative_advice(struct sock *sk) diff --git a/include/net/tls.h b/include/net/tls.h index ea0aeae26c..59ff5c901a 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -128,9 +128,6 @@ struct tls_sw_context_tx { struct tls_rec *open_rec; struct list_head tx_list; atomic_t encrypt_pending; - /* protect crypto_wait with encrypt_pending */ - spinlock_t encrypt_compl_lock; - int async_notify; u8 async_capable:1; #define BIT_TX_SCHEDULED 0 @@ -148,9 +145,6 @@ struct tls_sw_context_rx { struct sk_buff *recv_pkt; u8 async_capable:1; atomic_t decrypt_pending; - /* protect crypto_wait with decrypt_pending*/ - spinlock_t decrypt_compl_lock; - bool async_notify; }; struct tls_record_info { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6156ed2950..2e2e30d31a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1019,6 +1019,9 @@ struct xfrm_offload { #define CRYPTO_INVALID_PACKET_SYNTAX 64 #define CRYPTO_INVALID_PROTOCOL 128 + /* Used to keep whole l2 header for transport mode GRO */ + __u32 orig_mac_len; + __u8 proto; __u8 inner_ipproto; }; diff --git a/include/soc/fsl/qman.h b/include/soc/fsl/qman.h index 59eeba31c1..0d3d6beb7f 100644 --- a/include/soc/fsl/qman.h +++ b/include/soc/fsl/qman.h @@ -1171,6 +1171,15 @@ int qman_delete_cgr(struct qman_cgr *cgr); */ void qman_delete_cgr_safe(struct qman_cgr *cgr); +/** + * qman_update_cgr_safe - Modifies a congestion group object from any CPU + * @cgr: the 'cgr' object to modify + * @opts: state of the CGR settings + * + * This will select the proper CPU and modify the CGR settings. + */ +int qman_update_cgr_safe(struct qman_cgr *cgr, struct qm_mcc_initcgr *opts); + /** * qman_query_cgr_congested - Queries CGR's congestion status * @cgr: the 'cgr' object to query diff --git a/include/soc/qcom/cmd-db.h b/include/soc/qcom/cmd-db.h index c8bb56e685..47a6cab75e 100644 --- a/include/soc/qcom/cmd-db.h +++ b/include/soc/qcom/cmd-db.h @@ -1,5 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. */ +/* + * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ #ifndef __QCOM_COMMAND_DB_H__ #define __QCOM_COMMAND_DB_H__ @@ -21,6 +24,8 @@ u32 cmd_db_read_addr(const char *resource_id); const void *cmd_db_read_aux_data(const char *resource_id, size_t *len); +bool cmd_db_match_resource_addr(u32 addr1, u32 addr2); + enum cmd_db_hw_type cmd_db_read_slave_id(const char *resource_id); int cmd_db_ready(void); @@ -31,6 +36,9 @@ static inline u32 cmd_db_read_addr(const char *resource_id) static inline const void *cmd_db_read_aux_data(const char *resource_id, size_t *len) { return ERR_PTR(-ENODEV); } +static inline bool cmd_db_match_resource_addr(u32 addr1, u32 addr2) +{ return false; } + static inline enum cmd_db_hw_type cmd_db_read_slave_id(const char *resource_id) { return -ENODEV; } diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h index 4d8ef71090..97a434d021 100644 --- a/include/trace/events/asoc.h +++ b/include/trace/events/asoc.h @@ -12,6 +12,8 @@ #define DAPM_DIRECT "(direct)" #define DAPM_ARROW(dir) (((dir) == SND_SOC_DAPM_DIR_OUT) ? "->" : "<-") +TRACE_DEFINE_ENUM(SND_SOC_DAPM_DIR_OUT); + struct snd_soc_jack; struct snd_soc_card; struct snd_soc_dapm_widget; diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index b2a2672e66..6959255ccf 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -13,6 +13,8 @@ #include +#include + /** ** GSS-API related trace events **/ @@ -99,7 +101,7 @@ DECLARE_EVENT_CLASS(rpcgss_gssapi_event, __entry->maj_stat = maj_stat; ), - TP_printk("task:%u@%u maj_stat=%s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " maj_stat=%s", __entry->task_id, __entry->client_id, __entry->maj_stat == 0 ? "GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat)) @@ -332,7 +334,8 @@ TRACE_EVENT(rpcgss_unwrap_failed, __entry->client_id = task->tk_client->cl_clid; ), - TP_printk("task:%u@%u", __entry->task_id, __entry->client_id) + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER, + __entry->task_id, __entry->client_id) ); TRACE_EVENT(rpcgss_bad_seqno, @@ -358,7 +361,8 @@ TRACE_EVENT(rpcgss_bad_seqno, __entry->received = received; ), - TP_printk("task:%u@%u expected seqno %u, received seqno %u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " expected seqno %u, received seqno %u", __entry->task_id, __entry->client_id, __entry->expected, __entry->received) ); @@ -386,7 +390,7 @@ TRACE_EVENT(rpcgss_seqno, __entry->seqno = rqst->rq_seqno; ), - TP_printk("task:%u@%u xid=0x%08x seqno=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x seqno=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->seqno) ); @@ -418,7 +422,8 @@ TRACE_EVENT(rpcgss_need_reencode, __entry->ret = ret; ), - TP_printk("task:%u@%u xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded", __entry->task_id, __entry->client_id, __entry->xid, __entry->seqno, __entry->seq_xmit, __entry->ret ? "" : "un") @@ -452,7 +457,8 @@ TRACE_EVENT(rpcgss_update_slack, __entry->verfsize = auth->au_verfsize; ), - TP_printk("task:%u@%u xid=0x%08x auth=%p rslack=%u ralign=%u verfsize=%u\n", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x auth=%p rslack=%u ralign=%u verfsize=%u\n", __entry->task_id, __entry->client_id, __entry->xid, __entry->auth, __entry->rslack, __entry->ralign, __entry->verfsize) @@ -581,7 +587,7 @@ TRACE_EVENT(rpcgss_context, __field(unsigned int, timeout) __field(u32, window_size) __field(int, len) - __string(acceptor, data) + __string_len(acceptor, data, len) ), TP_fast_assign( @@ -590,7 +596,7 @@ TRACE_EVENT(rpcgss_context, __entry->timeout = timeout; __entry->window_size = window_size; __entry->len = len; - strncpy(__get_str(acceptor), data, len); + __assign_str(acceptor, data); ), TP_printk("win_size=%u expiry=%lu now=%lu timeout=%u acceptor=%.*s", diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index de41954995..513c09774e 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -14,7 +14,9 @@ #include #include #include -#include + +#include +#include /** ** Event classes @@ -279,7 +281,8 @@ DECLARE_EVENT_CLASS(xprtrdma_rdch_event, __entry->nsegs = nsegs; ), - TP_printk("task:%u@%u pos=%u %u@0x%016llx:0x%08x (%s)", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " pos=%u %u@0x%016llx:0x%08x (%s)", __entry->task_id, __entry->client_id, __entry->pos, __entry->length, (unsigned long long)__entry->offset, __entry->handle, @@ -326,7 +329,8 @@ DECLARE_EVENT_CLASS(xprtrdma_wrch_event, __entry->nsegs = nsegs; ), - TP_printk("task:%u@%u %u@0x%016llx:0x%08x (%s)", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " %u@0x%016llx:0x%08x (%s)", __entry->task_id, __entry->client_id, __entry->length, (unsigned long long)__entry->offset, __entry->handle, @@ -387,7 +391,8 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class, __entry->dir = mr->mr_dir; ), - TP_printk("task:%u@%u mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)", __entry->task_id, __entry->client_id, __entry->mr_id, __entry->nents, __entry->length, (unsigned long long)__entry->offset, __entry->handle, @@ -630,9 +635,9 @@ TRACE_EVENT(xprtrdma_nomrs_err, __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s task:%u@%u", - __get_str(addr), __get_str(port), - __entry->task_id, __entry->client_id + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " peer=[%s]:%s", + __entry->task_id, __entry->client_id, + __get_str(addr), __get_str(port) ) ); @@ -693,7 +698,8 @@ TRACE_EVENT(xprtrdma_marshal, __entry->wtype = wtype; ), - TP_printk("task:%u@%u xid=0x%08x: hdr=%u xdr=%u/%u/%u %s/%s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x hdr=%u xdr=%u/%u/%u %s/%s", __entry->task_id, __entry->client_id, __entry->xid, __entry->hdrlen, __entry->headlen, __entry->pagelen, __entry->taillen, @@ -723,7 +729,7 @@ TRACE_EVENT(xprtrdma_marshal_failed, __entry->ret = ret; ), - TP_printk("task:%u@%u xid=0x%08x: ret=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x ret=%d", __entry->task_id, __entry->client_id, __entry->xid, __entry->ret ) @@ -750,7 +756,7 @@ TRACE_EVENT(xprtrdma_prepsend_failed, __entry->ret = ret; ), - TP_printk("task:%u@%u xid=0x%08x: ret=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x ret=%d", __entry->task_id, __entry->client_id, __entry->xid, __entry->ret ) @@ -785,7 +791,7 @@ TRACE_EVENT(xprtrdma_post_send, __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; ), - TP_printk("task:%u@%u cq.id=%u cid=%d (%d SGE%s) %s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " cq.id=%u cid=%d (%d SGE%s) %s", __entry->task_id, __entry->client_id, __entry->cq_id, __entry->completion_id, __entry->num_sge, (__entry->num_sge == 1 ? "" : "s"), @@ -820,7 +826,7 @@ TRACE_EVENT(xprtrdma_post_send_err, __entry->rc = rc; ), - TP_printk("task:%u@%u cq.id=%u rc=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " cq.id=%u rc=%d", __entry->task_id, __entry->client_id, __entry->cq_id, __entry->rc ) @@ -932,7 +938,7 @@ TRACE_EVENT(xprtrdma_post_linv_err, __entry->status = status; ), - TP_printk("task:%u@%u status=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d", __entry->task_id, __entry->client_id, __entry->status ) ); @@ -1120,7 +1126,7 @@ TRACE_EVENT(xprtrdma_reply, __entry->credits = credits; ), - TP_printk("task:%u@%u xid=0x%08x credits=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x credits=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->credits ) @@ -1156,7 +1162,7 @@ TRACE_EVENT(xprtrdma_err_vers, __entry->max = be32_to_cpup(max); ), - TP_printk("task:%u@%u xid=0x%08x versions=[%u, %u]", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x versions=[%u, %u]", __entry->task_id, __entry->client_id, __entry->xid, __entry->min, __entry->max ) @@ -1181,7 +1187,7 @@ TRACE_EVENT(xprtrdma_err_chunk, __entry->xid = be32_to_cpu(rqst->rq_xid); ), - TP_printk("task:%u@%u xid=0x%08x", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x", __entry->task_id, __entry->client_id, __entry->xid ) ); @@ -1207,7 +1213,7 @@ TRACE_EVENT(xprtrdma_err_unrecognized, __entry->procedure = be32_to_cpup(procedure); ), - TP_printk("task:%u@%u xid=0x%08x procedure=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x procedure=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->procedure ) @@ -1239,7 +1245,7 @@ TRACE_EVENT(xprtrdma_fixup, __entry->taillen = rqst->rq_rcv_buf.tail[0].iov_len; ), - TP_printk("task:%u@%u fixup=%lu xdr=%zu/%u/%zu", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " fixup=%lu xdr=%zu/%u/%zu", __entry->task_id, __entry->client_id, __entry->fixup, __entry->headlen, __entry->pagelen, __entry->taillen ) @@ -1289,7 +1295,7 @@ TRACE_EVENT(xprtrdma_mrs_zap, __entry->client_id = task->tk_client->cl_clid; ), - TP_printk("task:%u@%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER, __entry->task_id, __entry->client_id ) ); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f09bbb6c91..e8eb83315f 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,6 +14,8 @@ #include #include +#include + TRACE_DEFINE_ENUM(SOCK_STREAM); TRACE_DEFINE_ENUM(SOCK_DGRAM); TRACE_DEFINE_ENUM(SOCK_RAW); @@ -78,7 +80,8 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class, __entry->msg_len = xdr->len; ), - TP_printk("task:%u@%u head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", __entry->task_id, __entry->client_id, __entry->head_base, __entry->head_len, __entry->page_len, __entry->tail_base, __entry->tail_len, __entry->msg_len @@ -114,7 +117,7 @@ DECLARE_EVENT_CLASS(rpc_clnt_class, __entry->client_id = clnt->cl_clid; ), - TP_printk("clid=%u", __entry->client_id) + TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER, __entry->client_id) ); #define DEFINE_RPC_CLNT_EVENT(name) \ @@ -158,7 +161,8 @@ TRACE_EVENT(rpc_clnt_new, __assign_str(server, server); ), - TP_printk("client=%u peer=[%s]:%s program=%s server=%s", + TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER + " peer=[%s]:%s program=%s server=%s", __entry->client_id, __get_str(addr), __get_str(port), __get_str(program), __get_str(server)) ); @@ -206,7 +210,8 @@ TRACE_EVENT(rpc_clnt_clone_err, __entry->error = error; ), - TP_printk("client=%u error=%d", __entry->client_id, __entry->error) + TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER " error=%d", + __entry->client_id, __entry->error) ); @@ -248,7 +253,7 @@ DECLARE_EVENT_CLASS(rpc_task_status, __entry->status = task->tk_status; ), - TP_printk("task:%u@%u status=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d", __entry->task_id, __entry->client_id, __entry->status) ); @@ -288,7 +293,7 @@ TRACE_EVENT(rpc_request, __assign_str(procname, rpc_proc_name(task)); ), - TP_printk("task:%u@%u %sv%d %s (%ssync)", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " %sv%d %s (%ssync)", __entry->task_id, __entry->client_id, __get_str(progname), __entry->version, __get_str(procname), __entry->async ? "a": "" @@ -348,7 +353,8 @@ DECLARE_EVENT_CLASS(rpc_task_running, __entry->flags = task->tk_flags; ), - TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%ps", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " flags=%s runstate=%s status=%d action=%ps", __entry->task_id, __entry->client_id, rpc_show_task_flags(__entry->flags), rpc_show_runstate(__entry->runstate), @@ -400,7 +406,8 @@ DECLARE_EVENT_CLASS(rpc_task_queued, __assign_str(q_name, rpc_qname(q)); ), - TP_printk("task:%u@%d flags=%s runstate=%s status=%d timeout=%lu queue=%s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " flags=%s runstate=%s status=%d timeout=%lu queue=%s", __entry->task_id, __entry->client_id, rpc_show_task_flags(__entry->flags), rpc_show_runstate(__entry->runstate), @@ -436,7 +443,7 @@ DECLARE_EVENT_CLASS(rpc_failure, __entry->client_id = task->tk_client->cl_clid; ), - TP_printk("task:%u@%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER, __entry->task_id, __entry->client_id) ); @@ -478,7 +485,8 @@ DECLARE_EVENT_CLASS(rpc_reply_event, __assign_str(servername, task->tk_xprt->servername); ), - TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " server=%s xid=0x%08x %sv%d %s", __entry->task_id, __entry->client_id, __get_str(servername), __entry->xid, __get_str(progname), __entry->version, __get_str(procname)) @@ -538,7 +546,8 @@ TRACE_EVENT(rpc_buf_alloc, __entry->status = status; ), - TP_printk("task:%u@%u callsize=%zu recvsize=%zu status=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " callsize=%zu recvsize=%zu status=%d", __entry->task_id, __entry->client_id, __entry->callsize, __entry->recvsize, __entry->status ) @@ -567,7 +576,8 @@ TRACE_EVENT(rpc_call_rpcerror, __entry->rpc_status = rpc_status; ), - TP_printk("task:%u@%u tk_status=%d rpc_status=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " tk_status=%d rpc_status=%d", __entry->task_id, __entry->client_id, __entry->tk_status, __entry->rpc_status) ); @@ -607,7 +617,8 @@ TRACE_EVENT(rpc_stats_latency, __entry->execute = ktime_to_us(execute); ), - TP_printk("task:%u@%d xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu", __entry->task_id, __entry->client_id, __entry->xid, __get_str(progname), __entry->version, __get_str(procname), __entry->backlog, __entry->rtt, __entry->execute) @@ -651,8 +662,8 @@ TRACE_EVENT(rpc_xdr_overflow, __entry->version = task->tk_client->cl_vers; __assign_str(procedure, task->tk_msg.rpc_proc->p_name); } else { - __entry->task_id = 0; - __entry->client_id = 0; + __entry->task_id = -1; + __entry->client_id = -1; __assign_str(progname, "unknown"); __entry->version = 0; __assign_str(procedure, "unknown"); @@ -668,8 +679,8 @@ TRACE_EVENT(rpc_xdr_overflow, __entry->len = xdr->buf->len; ), - TP_printk( - "task:%u@%u %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", __entry->task_id, __entry->client_id, __get_str(progname), __entry->version, __get_str(procedure), __entry->requested, __entry->p, __entry->end, @@ -727,8 +738,8 @@ TRACE_EVENT(rpc_xdr_alignment, __entry->len = xdr->buf->len; ), - TP_printk( - "task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n", __entry->task_id, __entry->client_id, __get_str(progname), __entry->version, __get_str(procedure), __entry->offset, __entry->copied, @@ -917,7 +928,8 @@ TRACE_EVENT(rpc_socket_nospace, __entry->remaining = rqst->rq_slen - transport->xmit.offset; ), - TP_printk("task:%u@%u total=%u remaining=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " total=%u remaining=%u", __entry->task_id, __entry->client_id, __entry->total, __entry->remaining ) @@ -1042,8 +1054,8 @@ TRACE_EVENT(xprt_transmit, __entry->status = status; ), - TP_printk( - "task:%u@%u xid=0x%08x seqno=%u status=%d", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x seqno=%u status=%d", __entry->task_id, __entry->client_id, __entry->xid, __entry->seqno, __entry->status) ); @@ -1082,8 +1094,8 @@ TRACE_EVENT(xprt_retransmit, __assign_str(procname, rpc_proc_name(task)); ), - TP_printk( - "task:%u@%u xid=0x%08x %sv%d %s ntrans=%d timeout=%lu", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x %sv%d %s ntrans=%d timeout=%lu", __entry->task_id, __entry->client_id, __entry->xid, __get_str(progname), __entry->version, __get_str(procname), __entry->ntrans, __entry->timeout @@ -1140,7 +1152,8 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, __entry->snd_task_id = -1; ), - TP_printk("task:%u@%u snd_task:%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " snd_task:" SUNRPC_TRACE_PID_SPECIFIER, __entry->task_id, __entry->client_id, __entry->snd_task_id) ); @@ -1192,7 +1205,9 @@ DECLARE_EVENT_CLASS(xprt_cong_event, __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); ), - TP_printk("task:%u@%u snd_task:%u cong=%lu cwnd=%lu%s", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " snd_task:" SUNRPC_TRACE_PID_SPECIFIER + " cong=%lu cwnd=%lu%s", __entry->task_id, __entry->client_id, __entry->snd_task_id, __entry->cong, __entry->cwnd, __entry->wait ? " (wait)" : "") @@ -1230,7 +1245,7 @@ TRACE_EVENT(xprt_reserve, __entry->xid = be32_to_cpu(rqst->rq_xid); ), - TP_printk("task:%u@%u xid=0x%08x", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x", __entry->task_id, __entry->client_id, __entry->xid ) ); @@ -1319,7 +1334,8 @@ TRACE_EVENT(rpcb_getport, __assign_str(servername, task->tk_xprt->servername); ), - TP_printk("task:%u@%u server=%s program=%u version=%u protocol=%d bind_version=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " server=%s program=%u version=%u protocol=%d bind_version=%u", __entry->task_id, __entry->client_id, __get_str(servername), __entry->program, __entry->version, __entry->protocol, __entry->bind_version @@ -1349,7 +1365,7 @@ TRACE_EVENT(rpcb_setport, __entry->port = port; ), - TP_printk("task:%u@%u status=%d port=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d port=%u", __entry->task_id, __entry->client_id, __entry->status, __entry->port ) diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h new file mode 100644 index 0000000000..738b97f22f --- /dev/null +++ b/include/trace/misc/fs.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Display helpers for generic filesystem items + * + * Author: Chuck Lever + * + * Copyright (c) 2020, Oracle and/or its affiliates. + */ + +#include + +#define show_fs_dirent_type(x) \ + __print_symbolic(x, \ + { DT_UNKNOWN, "UNKNOWN" }, \ + { DT_FIFO, "FIFO" }, \ + { DT_CHR, "CHR" }, \ + { DT_DIR, "DIR" }, \ + { DT_BLK, "BLK" }, \ + { DT_REG, "REG" }, \ + { DT_LNK, "LNK" }, \ + { DT_SOCK, "SOCK" }, \ + { DT_WHT, "WHT" }) + +#define show_fs_fcntl_open_flags(x) \ + __print_flags(x, "|", \ + { O_WRONLY, "O_WRONLY" }, \ + { O_RDWR, "O_RDWR" }, \ + { O_CREAT, "O_CREAT" }, \ + { O_EXCL, "O_EXCL" }, \ + { O_NOCTTY, "O_NOCTTY" }, \ + { O_TRUNC, "O_TRUNC" }, \ + { O_APPEND, "O_APPEND" }, \ + { O_NONBLOCK, "O_NONBLOCK" }, \ + { O_DSYNC, "O_DSYNC" }, \ + { O_DIRECT, "O_DIRECT" }, \ + { O_LARGEFILE, "O_LARGEFILE" }, \ + { O_DIRECTORY, "O_DIRECTORY" }, \ + { O_NOFOLLOW, "O_NOFOLLOW" }, \ + { O_NOATIME, "O_NOATIME" }, \ + { O_CLOEXEC, "O_CLOEXEC" }) + +#define __fmode_flag(x) { (__force unsigned long)FMODE_##x, #x } +#define show_fs_fmode_flags(x) \ + __print_flags(x, "|", \ + __fmode_flag(READ), \ + __fmode_flag(WRITE), \ + __fmode_flag(EXEC)) + +#ifdef CONFIG_64BIT +#define show_fs_fcntl_cmd(x) \ + __print_symbolic(x, \ + { F_DUPFD, "DUPFD" }, \ + { F_GETFD, "GETFD" }, \ + { F_SETFD, "SETFD" }, \ + { F_GETFL, "GETFL" }, \ + { F_SETFL, "SETFL" }, \ + { F_GETLK, "GETLK" }, \ + { F_SETLK, "SETLK" }, \ + { F_SETLKW, "SETLKW" }, \ + { F_SETOWN, "SETOWN" }, \ + { F_GETOWN, "GETOWN" }, \ + { F_SETSIG, "SETSIG" }, \ + { F_GETSIG, "GETSIG" }, \ + { F_SETOWN_EX, "SETOWN_EX" }, \ + { F_GETOWN_EX, "GETOWN_EX" }, \ + { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \ + { F_OFD_GETLK, "OFD_GETLK" }, \ + { F_OFD_SETLK, "OFD_SETLK" }, \ + { F_OFD_SETLKW, "OFD_SETLKW" }) +#else /* CONFIG_64BIT */ +#define show_fs_fcntl_cmd(x) \ + __print_symbolic(x, \ + { F_DUPFD, "DUPFD" }, \ + { F_GETFD, "GETFD" }, \ + { F_SETFD, "SETFD" }, \ + { F_GETFL, "GETFL" }, \ + { F_SETFL, "SETFL" }, \ + { F_GETLK, "GETLK" }, \ + { F_SETLK, "SETLK" }, \ + { F_SETLKW, "SETLKW" }, \ + { F_SETOWN, "SETOWN" }, \ + { F_GETOWN, "GETOWN" }, \ + { F_SETSIG, "SETSIG" }, \ + { F_GETSIG, "GETSIG" }, \ + { F_GETLK64, "GETLK64" }, \ + { F_SETLK64, "SETLK64" }, \ + { F_SETLKW64, "SETLKW64" }, \ + { F_SETOWN_EX, "SETOWN_EX" }, \ + { F_GETOWN_EX, "GETOWN_EX" }, \ + { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \ + { F_OFD_GETLK, "OFD_GETLK" }, \ + { F_OFD_SETLK, "OFD_SETLK" }, \ + { F_OFD_SETLKW, "OFD_SETLKW" }) +#endif /* CONFIG_64BIT */ + +#define show_fs_fcntl_lock_type(x) \ + __print_symbolic(x, \ + { F_RDLCK, "RDLCK" }, \ + { F_WRLCK, "WRLCK" }, \ + { F_UNLCK, "UNLCK" }) + +#define show_fs_lookup_flags(flags) \ + __print_flags(flags, "|", \ + { LOOKUP_FOLLOW, "FOLLOW" }, \ + { LOOKUP_DIRECTORY, "DIRECTORY" }, \ + { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ + { LOOKUP_EMPTY, "EMPTY" }, \ + { LOOKUP_DOWN, "DOWN" }, \ + { LOOKUP_MOUNTPOINT, "MOUNTPOINT" }, \ + { LOOKUP_REVAL, "REVAL" }, \ + { LOOKUP_RCU, "RCU" }, \ + { LOOKUP_OPEN, "OPEN" }, \ + { LOOKUP_CREATE, "CREATE" }, \ + { LOOKUP_EXCL, "EXCL" }, \ + { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \ + { LOOKUP_PARENT, "PARENT" }, \ + { LOOKUP_NO_SYMLINKS, "NO_SYMLINKS" }, \ + { LOOKUP_NO_MAGICLINKS, "NO_MAGICLINKS" }, \ + { LOOKUP_NO_XDEV, "NO_XDEV" }, \ + { LOOKUP_BENEATH, "BENEATH" }, \ + { LOOKUP_IN_ROOT, "IN_ROOT" }, \ + { LOOKUP_CACHED, "CACHED" }) diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h new file mode 100644 index 0000000000..0d9d48dca3 --- /dev/null +++ b/include/trace/misc/nfs.h @@ -0,0 +1,387 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Display helpers for NFS protocol elements + * + * Author: Chuck Lever + * + * Copyright (c) 2020, Oracle and/or its affiliates. + */ + +#include +#include +#include + +TRACE_DEFINE_ENUM(NFS_OK); +TRACE_DEFINE_ENUM(NFSERR_PERM); +TRACE_DEFINE_ENUM(NFSERR_NOENT); +TRACE_DEFINE_ENUM(NFSERR_IO); +TRACE_DEFINE_ENUM(NFSERR_NXIO); +TRACE_DEFINE_ENUM(NFSERR_EAGAIN); +TRACE_DEFINE_ENUM(NFSERR_ACCES); +TRACE_DEFINE_ENUM(NFSERR_EXIST); +TRACE_DEFINE_ENUM(NFSERR_XDEV); +TRACE_DEFINE_ENUM(NFSERR_NODEV); +TRACE_DEFINE_ENUM(NFSERR_NOTDIR); +TRACE_DEFINE_ENUM(NFSERR_ISDIR); +TRACE_DEFINE_ENUM(NFSERR_INVAL); +TRACE_DEFINE_ENUM(NFSERR_FBIG); +TRACE_DEFINE_ENUM(NFSERR_NOSPC); +TRACE_DEFINE_ENUM(NFSERR_ROFS); +TRACE_DEFINE_ENUM(NFSERR_MLINK); +TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP); +TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG); +TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY); +TRACE_DEFINE_ENUM(NFSERR_DQUOT); +TRACE_DEFINE_ENUM(NFSERR_STALE); +TRACE_DEFINE_ENUM(NFSERR_REMOTE); +TRACE_DEFINE_ENUM(NFSERR_WFLUSH); +TRACE_DEFINE_ENUM(NFSERR_BADHANDLE); +TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC); +TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE); +TRACE_DEFINE_ENUM(NFSERR_NOTSUPP); +TRACE_DEFINE_ENUM(NFSERR_TOOSMALL); +TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT); +TRACE_DEFINE_ENUM(NFSERR_BADTYPE); +TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); + +#define show_nfs_status(x) \ + __print_symbolic(x, \ + { NFS_OK, "OK" }, \ + { NFSERR_PERM, "PERM" }, \ + { NFSERR_NOENT, "NOENT" }, \ + { NFSERR_IO, "IO" }, \ + { NFSERR_NXIO, "NXIO" }, \ + { ECHILD, "CHILD" }, \ + { NFSERR_EAGAIN, "AGAIN" }, \ + { NFSERR_ACCES, "ACCES" }, \ + { NFSERR_EXIST, "EXIST" }, \ + { NFSERR_XDEV, "XDEV" }, \ + { NFSERR_NODEV, "NODEV" }, \ + { NFSERR_NOTDIR, "NOTDIR" }, \ + { NFSERR_ISDIR, "ISDIR" }, \ + { NFSERR_INVAL, "INVAL" }, \ + { NFSERR_FBIG, "FBIG" }, \ + { NFSERR_NOSPC, "NOSPC" }, \ + { NFSERR_ROFS, "ROFS" }, \ + { NFSERR_MLINK, "MLINK" }, \ + { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \ + { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \ + { NFSERR_NOTEMPTY, "NOTEMPTY" }, \ + { NFSERR_DQUOT, "DQUOT" }, \ + { NFSERR_STALE, "STALE" }, \ + { NFSERR_REMOTE, "REMOTE" }, \ + { NFSERR_WFLUSH, "WFLUSH" }, \ + { NFSERR_BADHANDLE, "BADHANDLE" }, \ + { NFSERR_NOT_SYNC, "NOTSYNC" }, \ + { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \ + { NFSERR_NOTSUPP, "NOTSUPP" }, \ + { NFSERR_TOOSMALL, "TOOSMALL" }, \ + { NFSERR_SERVERFAULT, "REMOTEIO" }, \ + { NFSERR_BADTYPE, "BADTYPE" }, \ + { NFSERR_JUKEBOX, "JUKEBOX" }) + +TRACE_DEFINE_ENUM(NFS_UNSTABLE); +TRACE_DEFINE_ENUM(NFS_DATA_SYNC); +TRACE_DEFINE_ENUM(NFS_FILE_SYNC); + +#define show_nfs_stable_how(x) \ + __print_symbolic(x, \ + { NFS_UNSTABLE, "UNSTABLE" }, \ + { NFS_DATA_SYNC, "DATA_SYNC" }, \ + { NFS_FILE_SYNC, "FILE_SYNC" }) + +TRACE_DEFINE_ENUM(NFS4_OK); +TRACE_DEFINE_ENUM(NFS4ERR_ACCESS); +TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED); +TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY); +TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR); +TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE); +TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE); +TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT); +TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL); +TRACE_DEFINE_ENUM(NFS4ERR_BADNAME); +TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER); +TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION); +TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT); +TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE); +TRACE_DEFINE_ENUM(NFS4ERR_BADXDR); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID); +TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN); +TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE); +TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY); +TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY); +TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION); +TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK); +TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION); +TRACE_DEFINE_ENUM(NFS4ERR_DELAY); +TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED); +TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED); +TRACE_DEFINE_ENUM(NFS4ERR_DENIED); +TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL); +TRACE_DEFINE_ENUM(NFS4ERR_DQUOT); +TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_EXIST); +TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED); +TRACE_DEFINE_ENUM(NFS4ERR_FBIG); +TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED); +TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN); +TRACE_DEFINE_ENUM(NFS4ERR_GRACE); +TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_INVAL); +TRACE_DEFINE_ENUM(NFS4ERR_IO); +TRACE_DEFINE_ENUM(NFS4ERR_ISDIR); +TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER); +TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE); +TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED); +TRACE_DEFINE_ENUM(NFS4ERR_LOCKED); +TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD); +TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE); +TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH); +TRACE_DEFINE_ENUM(NFS4ERR_MLINK); +TRACE_DEFINE_ENUM(NFS4ERR_MOVED); +TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG); +TRACE_DEFINE_ENUM(NFS4ERR_NOENT); +TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE); +TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT); +TRACE_DEFINE_ENUM(NFS4ERR_NOSPC); +TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR); +TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY); +TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP); +TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME); +TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE); +TRACE_DEFINE_ENUM(NFS4ERR_NXIO); +TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID); +TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE); +TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL); +TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION); +TRACE_DEFINE_ENUM(NFS4ERR_PERM); +TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE); +TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT); +TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT); +TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD); +TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT); +TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG); +TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG); +TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE); +TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG); +TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE); +TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH); +TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP); +TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT); +TRACE_DEFINE_ENUM(NFS4ERR_ROFS); +TRACE_DEFINE_ENUM(NFS4ERR_SAME); +TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED); +TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS); +TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY); +TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED); +TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT); +TRACE_DEFINE_ENUM(NFS4ERR_STALE); +TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID); +TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID); +TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK); +TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL); +TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS); +TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE); +TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND); +TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC); +TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED); +TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); +TRACE_DEFINE_ENUM(NFS4ERR_XDEV); + +TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS); +TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); + +#define show_nfs4_status(x) \ + __print_symbolic(x, \ + { NFS4_OK, "OK" }, \ + { EPERM, "EPERM" }, \ + { ENOENT, "ENOENT" }, \ + { EIO, "EIO" }, \ + { ENXIO, "ENXIO" }, \ + { EACCES, "EACCES" }, \ + { EEXIST, "EEXIST" }, \ + { EXDEV, "EXDEV" }, \ + { ENOTDIR, "ENOTDIR" }, \ + { EISDIR, "EISDIR" }, \ + { EFBIG, "EFBIG" }, \ + { ENOSPC, "ENOSPC" }, \ + { EROFS, "EROFS" }, \ + { EMLINK, "EMLINK" }, \ + { ENAMETOOLONG, "ENAMETOOLONG" }, \ + { ENOTEMPTY, "ENOTEMPTY" }, \ + { EDQUOT, "EDQUOT" }, \ + { ESTALE, "ESTALE" }, \ + { EBADHANDLE, "EBADHANDLE" }, \ + { EBADCOOKIE, "EBADCOOKIE" }, \ + { ENOTSUPP, "ENOTSUPP" }, \ + { ETOOSMALL, "ETOOSMALL" }, \ + { EREMOTEIO, "EREMOTEIO" }, \ + { EBADTYPE, "EBADTYPE" }, \ + { EAGAIN, "EAGAIN" }, \ + { ELOOP, "ELOOP" }, \ + { EOPNOTSUPP, "EOPNOTSUPP" }, \ + { EDEADLK, "EDEADLK" }, \ + { ENOMEM, "ENOMEM" }, \ + { EKEYEXPIRED, "EKEYEXPIRED" }, \ + { ETIMEDOUT, "ETIMEDOUT" }, \ + { ERESTARTSYS, "ERESTARTSYS" }, \ + { ECONNREFUSED, "ECONNREFUSED" }, \ + { ECONNRESET, "ECONNRESET" }, \ + { ENETUNREACH, "ENETUNREACH" }, \ + { EHOSTUNREACH, "EHOSTUNREACH" }, \ + { EHOSTDOWN, "EHOSTDOWN" }, \ + { EPIPE, "EPIPE" }, \ + { EPFNOSUPPORT, "EPFNOSUPPORT" }, \ + { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ + { NFS4ERR_ACCESS, "ACCESS" }, \ + { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ + { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \ + { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \ + { NFS4ERR_BADCHAR, "BADCHAR" }, \ + { NFS4ERR_BADHANDLE, "BADHANDLE" }, \ + { NFS4ERR_BADIOMODE, "BADIOMODE" }, \ + { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \ + { NFS4ERR_BADLABEL, "BADLABEL" }, \ + { NFS4ERR_BADNAME, "BADNAME" }, \ + { NFS4ERR_BADOWNER, "BADOWNER" }, \ + { NFS4ERR_BADSESSION, "BADSESSION" }, \ + { NFS4ERR_BADSLOT, "BADSLOT" }, \ + { NFS4ERR_BADTYPE, "BADTYPE" }, \ + { NFS4ERR_BADXDR, "BADXDR" }, \ + { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \ + { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \ + { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \ + { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \ + { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \ + { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \ + { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ + { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \ + { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \ + { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \ + { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, "CONN_NOT_BOUND_TO_SESSION" }, \ + { NFS4ERR_DEADLOCK, "DEADLOCK" }, \ + { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \ + { NFS4ERR_DELAY, "DELAY" }, \ + { NFS4ERR_DELEG_ALREADY_WANTED, "DELEG_ALREADY_WANTED" }, \ + { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \ + { NFS4ERR_DENIED, "DENIED" }, \ + { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \ + { NFS4ERR_DQUOT, "DQUOT" }, \ + { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \ + { NFS4ERR_EXIST, "EXIST" }, \ + { NFS4ERR_EXPIRED, "EXPIRED" }, \ + { NFS4ERR_FBIG, "FBIG" }, \ + { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \ + { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \ + { NFS4ERR_GRACE, "GRACE" }, \ + { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \ + { NFS4ERR_INVAL, "INVAL" }, \ + { NFS4ERR_IO, "IO" }, \ + { NFS4ERR_ISDIR, "ISDIR" }, \ + { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \ + { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \ + { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \ + { NFS4ERR_LOCKED, "LOCKED" }, \ + { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \ + { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \ + { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \ + { NFS4ERR_MLINK, "MLINK" }, \ + { NFS4ERR_MOVED, "MOVED" }, \ + { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \ + { NFS4ERR_NOENT, "NOENT" }, \ + { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \ + { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \ + { NFS4ERR_NOSPC, "NOSPC" }, \ + { NFS4ERR_NOTDIR, "NOTDIR" }, \ + { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \ + { NFS4ERR_NOTSUPP, "NOTSUPP" }, \ + { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \ + { NFS4ERR_NOT_SAME, "NOT_SAME" }, \ + { NFS4ERR_NO_GRACE, "NO_GRACE" }, \ + { NFS4ERR_NXIO, "NXIO" }, \ + { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \ + { NFS4ERR_OPENMODE, "OPENMODE" }, \ + { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \ + { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \ + { NFS4ERR_PERM, "PERM" }, \ + { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \ + { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \ + { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \ + { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \ + { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \ + { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \ + { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \ + { NFS4ERR_REP_TOO_BIG_TO_CACHE, "REP_TOO_BIG_TO_CACHE" }, \ + { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \ + { NFS4ERR_RESOURCE, "RESOURCE" }, \ + { NFS4ERR_RESTOREFH, "RESTOREFH" }, \ + { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \ + { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \ + { NFS4ERR_ROFS, "ROFS" }, \ + { NFS4ERR_SAME, "SAME" }, \ + { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \ + { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \ + { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \ + { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \ + { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \ + { NFS4ERR_STALE, "STALE" }, \ + { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \ + { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \ + { NFS4ERR_SYMLINK, "SYMLINK" }, \ + { NFS4ERR_TOOSMALL, "TOOSMALL" }, \ + { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \ + { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \ + { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \ + { NFS4ERR_WRONGSEC, "WRONGSEC" }, \ + { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ + { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ + { NFS4ERR_XDEV, "XDEV" }, \ + /* ***** Internal to Linux NFS client ***** */ \ + { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \ + { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" }) + +#define show_nfs4_verifier(x) \ + __print_hex_str(x, NFS4_VERIFIER_SIZE) + +TRACE_DEFINE_ENUM(IOMODE_READ); +TRACE_DEFINE_ENUM(IOMODE_RW); +TRACE_DEFINE_ENUM(IOMODE_ANY); + +#define show_pnfs_layout_iomode(x) \ + __print_symbolic(x, \ + { IOMODE_READ, "READ" }, \ + { IOMODE_RW, "RW" }, \ + { IOMODE_ANY, "ANY" }) + +#define show_rca_mask(x) \ + __print_flags(x, "|", \ + { BIT(RCA4_TYPE_MASK_RDATA_DLG), "RDATA_DLG" }, \ + { BIT(RCA4_TYPE_MASK_WDATA_DLG), "WDATA_DLG" }, \ + { BIT(RCA4_TYPE_MASK_DIR_DLG), "DIR_DLG" }, \ + { BIT(RCA4_TYPE_MASK_FILE_LAYOUT), "FILE_LAYOUT" }, \ + { BIT(RCA4_TYPE_MASK_BLK_LAYOUT), "BLK_LAYOUT" }, \ + { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MIN), "OBJ_LAYOUT_MIN" }, \ + { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MAX), "OBJ_LAYOUT_MAX" }, \ + { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MIN), "OTHER_LAYOUT_MIN" }, \ + { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MAX), "OTHER_LAYOUT_MAX" }) + +#define show_nfs4_seq4_status(x) \ + __print_flags(x, "|", \ + { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ + { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, "CB_GSS_CONTEXTS_EXPIRING" }, \ + { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, "CB_GSS_CONTEXTS_EXPIRED" }, \ + { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, "EXPIRED_ALL_STATE_REVOKED" }, \ + { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, "EXPIRED_SOME_STATE_REVOKED" }, \ + { SEQ4_STATUS_ADMIN_STATE_REVOKED, "ADMIN_STATE_REVOKED" }, \ + { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, "RECALLABLE_STATE_REVOKED" }, \ + { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \ + { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \ + { SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \ + { SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" }) diff --git a/include/trace/events/rdma.h b/include/trace/misc/rdma.h similarity index 100% rename from include/trace/events/rdma.h rename to include/trace/misc/rdma.h diff --git a/include/trace/misc/sunrpc.h b/include/trace/misc/sunrpc.h new file mode 100644 index 0000000000..588557d07e --- /dev/null +++ b/include/trace/misc/sunrpc.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2021 Oracle and/or its affiliates. + * + * Common types and format specifiers for sunrpc. + */ + +#if !defined(_TRACE_SUNRPC_BASE_H) +#define _TRACE_SUNRPC_BASE_H + +#include + +#define SUNRPC_TRACE_PID_SPECIFIER "%08x" +#define SUNRPC_TRACE_CLID_SPECIFIER "%08x" +#define SUNRPC_TRACE_TASK_SPECIFIER \ + "task:" SUNRPC_TRACE_PID_SPECIFIER "@" SUNRPC_TRACE_CLID_SPECIFIER + +#endif /* _TRACE_SUNRPC_BASE_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bdb5f2ba76..6bfb510656 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6089,7 +6089,7 @@ struct bpf_fib_lookup { /* output: MTU value */ __u16 mtu_result; - }; + } __attribute__((packed, aligned(2))); /* input: L3 device index for lookup * output: device index from FIB lookup */ diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 64553df9d7..d8536d77fe 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -20,6 +20,7 @@ #define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FAN_FS_ERROR 0x00008000 /* Filesystem error */ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ @@ -27,6 +28,8 @@ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ +#define FAN_RENAME 0x10000000 /* File was renamed */ + #define FAN_ONDIR 0x40000000 /* Event occurred against dir */ /* helper events */ @@ -56,9 +59,13 @@ #define FAN_REPORT_FID 0x00000200 /* Report unique file id */ #define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ #define FAN_REPORT_NAME 0x00000800 /* Report events with name */ +#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) +/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */ +#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \ + FAN_REPORT_FID | FAN_REPORT_TARGET_FID) /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ @@ -75,12 +82,21 @@ #define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 #define FAN_MARK_FLUSH 0x00000080 /* FAN_MARK_FILESYSTEM is 0x00000100 */ +#define FAN_MARK_EVICTABLE 0x00000200 +/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */ +#define FAN_MARK_IGNORE 0x00000400 /* These are NOT bitwise flags. Both bits can be used togther. */ #define FAN_MARK_INODE 0x00000000 #define FAN_MARK_MOUNT 0x00000010 #define FAN_MARK_FILESYSTEM 0x00000100 +/* + * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY + * for non-inode mark types. + */ +#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY) + /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ @@ -125,6 +141,13 @@ struct fanotify_event_metadata { #define FAN_EVENT_INFO_TYPE_DFID_NAME 2 #define FAN_EVENT_INFO_TYPE_DFID 3 #define FAN_EVENT_INFO_TYPE_PIDFD 4 +#define FAN_EVENT_INFO_TYPE_ERROR 5 + +/* Special info types for FAN_RENAME */ +#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 +/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */ +#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12 +/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */ /* Variable length info record following event metadata */ struct fanotify_event_info_header { @@ -159,6 +182,12 @@ struct fanotify_event_info_pidfd { __s32 pidfd; }; +struct fanotify_event_info_error { + struct fanotify_event_info_header hdr; + __s32 error; + __u32 error_count; +}; + struct fanotify_response { __s32 fd; __u32 response; diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 7989d9483e..bed20a89c1 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -602,6 +602,7 @@ #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ +#define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h deleted file mode 100644 index e29e8accc4..0000000000 --- a/include/uapi/linux/nfsd/nfsfh.h +++ /dev/null @@ -1,115 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * This file describes the layout of the file handles as passed - * over the wire. - * - * Copyright (C) 1995, 1996, 1997 Olaf Kirch - */ - -#ifndef _UAPI_LINUX_NFSD_FH_H -#define _UAPI_LINUX_NFSD_FH_H - -#include -#include -#include -#include -#include - -/* - * This is the old "dentry style" Linux NFSv2 file handle. - * - * The xino and xdev fields are currently used to transport the - * ino/dev of the exported inode. - */ -struct nfs_fhbase_old { - __u32 fb_dcookie; /* dentry cookie - always 0xfeebbaca */ - __u32 fb_ino; /* our inode number */ - __u32 fb_dirino; /* dir inode number, 0 for directories */ - __u32 fb_dev; /* our device */ - __u32 fb_xdev; - __u32 fb_xino; - __u32 fb_generation; -}; - -/* - * This is the new flexible, extensible style NFSv2/v3/v4 file handle. - * - * The file handle starts with a sequence of four-byte words. - * The first word contains a version number (1) and three descriptor bytes - * that tell how the remaining 3 variable length fields should be handled. - * These three bytes are auth_type, fsid_type and fileid_type. - * - * All four-byte values are in host-byte-order. - * - * The auth_type field is deprecated and must be set to 0. - * - * The fsid_type identifies how the filesystem (or export point) is - * encoded. - * Current values: - * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number - * NOTE: we cannot use the kdev_t device id value, because kdev_t.h - * says we mustn't. We must break it up and reassemble. - * 1 - 4 byte user specified identifier - * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED - * 3 - 4 byte device id, encoded for user-space, 4 byte inode number - * 4 - 4 byte inode number and 4 byte uuid - * 5 - 8 byte uuid - * 6 - 16 byte uuid - * 7 - 8 byte inode number and 16 byte uuid - * - * The fileid_type identified how the file within the filesystem is encoded. - * The values for this field are filesystem specific, exccept that - * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' - * in include/linux/exportfs.h for currently registered values. - */ -struct nfs_fhbase_new { - union { - struct { - __u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */ - __u8 fb_auth_type_aux; - __u8 fb_fsid_type_aux; - __u8 fb_fileid_type_aux; - __u32 fb_auth[1]; - /* __u32 fb_fsid[0]; floating */ - /* __u32 fb_fileid[0]; floating */ - }; - struct { - __u8 fb_version; /* == 1, even => nfs_fhbase_old */ - __u8 fb_auth_type; - __u8 fb_fsid_type; - __u8 fb_fileid_type; - __u32 fb_auth_flex[]; /* flexible-array member */ - }; - }; -}; - -struct knfsd_fh { - unsigned int fh_size; /* significant for NFSv3. - * Points to the current size while building - * a new file handle - */ - union { - struct nfs_fhbase_old fh_old; - __u32 fh_pad[NFS4_FHSIZE/4]; - struct nfs_fhbase_new fh_new; - } fh_base; -}; - -#define ofh_dcookie fh_base.fh_old.fb_dcookie -#define ofh_ino fh_base.fh_old.fb_ino -#define ofh_dirino fh_base.fh_old.fb_dirino -#define ofh_dev fh_base.fh_old.fb_dev -#define ofh_xdev fh_base.fh_old.fb_xdev -#define ofh_xino fh_base.fh_old.fb_xino -#define ofh_generation fh_base.fh_old.fb_generation - -#define fh_version fh_base.fh_new.fb_version -#define fh_fsid_type fh_base.fh_new.fb_fsid_type -#define fh_auth_type fh_base.fh_new.fb_auth_type -#define fh_fileid_type fh_base.fh_new.fb_fileid_type -#define fh_fsid fh_base.fh_new.fb_auth_flex - -/* Do not use, provided for userspace compatiblity. */ -#define fh_auth fh_base.fh_new.fb_auth - -#endif /* _UAPI_LINUX_NFSD_FH_H */ diff --git a/init/initramfs.c b/init/initramfs.c index a842c05447..f153fb5057 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -655,7 +655,7 @@ static void __init populate_initrd_image(char *err) printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", err); - file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); + file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700); if (IS_ERR(file)) return; diff --git a/init/main.c b/init/main.c index f27e8510b1..3f3dc2a8bd 100644 --- a/init/main.c +++ b/init/main.c @@ -640,6 +640,8 @@ static void __init setup_command_line(char *command_line) if (!saved_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen); + len = xlen + strlen(command_line) + 1; + static_command_line = memblock_alloc(len, SMP_CACHE_BYTES); if (!static_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index a51429c034..ea005700c8 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -6621,6 +6621,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { switch (req->opcode) { case IORING_OP_NOP: + if (READ_ONCE(sqe->rw_flags)) + return -EINVAL; return 0; case IORING_OP_READV: case IORING_OP_READ_FIXED: @@ -8422,7 +8424,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, } io_rsrc_node_switch(ctx, NULL); - return ret; + return 0; out_fput: for (i = 0; i < ctx->nr_user_files; i++) { file = io_file_from_index(ctx, i); diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index c428312938..c565fbf66a 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -100,7 +100,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa audit_update_mark(audit_mark, dentry->d_inode); audit_mark->rule = krule; - ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, true); + ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0); if (ret < 0) { audit_mark->path = NULL; fsnotify_put_mark(&audit_mark->mark); @@ -161,8 +161,7 @@ static int audit_mark_handle_event(struct fsnotify_mark *inode_mark, u32 mask, audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark); - if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group) || - WARN_ON_ONCE(!inode)) + if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group)) return 0; if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) { @@ -183,7 +182,8 @@ static const struct fsnotify_ops audit_mark_fsnotify_ops = { static int __init audit_fsnotify_init(void) { - audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops); + audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops, + FSNOTIFY_GROUP_DUPS); if (IS_ERR(audit_fsnotify_group)) { audit_fsnotify_group = NULL; audit_panic("cannot create audit fsnotify group"); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 2cd7b56944..18ab4575ae 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -1073,7 +1073,7 @@ static int __init audit_tree_init(void) audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC); - audit_tree_group = fsnotify_alloc_group(&audit_tree_ops); + audit_tree_group = fsnotify_alloc_group(&audit_tree_ops, 0); if (IS_ERR(audit_tree_group)) audit_panic("cannot initialize fsnotify group for rectree watches"); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index edbeffee64..5cf22fe301 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -472,8 +472,7 @@ static int audit_watch_handle_event(struct fsnotify_mark *inode_mark, u32 mask, parent = container_of(inode_mark, struct audit_parent, mark); - if (WARN_ON_ONCE(inode_mark->group != audit_watch_group) || - WARN_ON_ONCE(!inode)) + if (WARN_ON_ONCE(inode_mark->group != audit_watch_group)) return 0; if (mask & (FS_CREATE|FS_MOVED_TO) && inode) @@ -493,7 +492,7 @@ static const struct fsnotify_ops audit_watch_fsnotify_ops = { static int __init audit_watch_init(void) { - audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops); + audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops, 0); if (IS_ERR(audit_watch_group)) { audit_watch_group = NULL; audit_panic("cannot create audit fsnotify group"); diff --git a/kernel/bounds.c b/kernel/bounds.c index 9795d75b09..a3e1d3dfad 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -19,7 +19,7 @@ int main(void) DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); #ifdef CONFIG_SMP - DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); + DEFINE(NR_CPUS_BITS, order_base_2(CONFIG_NR_CPUS)); #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); /* End of constants */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 5d4bea53ac..a0c7e13e0a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5447,6 +5447,46 @@ static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { #endif }; +/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */ +static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int rec) +{ + const struct btf_type *member_type; + const struct btf_member *member; + u32 i; + + if (!btf_type_is_struct(t)) + return false; + + for_each_member(i, t, member) { + const struct btf_array *array; + + member_type = btf_type_skip_modifiers(btf, member->type, NULL); + if (btf_type_is_struct(member_type)) { + if (rec >= 3) { + bpf_log(log, "max struct nesting depth exceeded\n"); + return false; + } + if (!__btf_type_is_scalar_struct(log, btf, member_type, rec + 1)) + return false; + continue; + } + if (btf_type_is_array(member_type)) { + array = btf_type_array(member_type); + if (!array->nelems) + return false; + member_type = btf_type_skip_modifiers(btf, array->type, NULL); + if (!btf_type_is_scalar(member_type)) + return false; + continue; + } + if (!btf_type_is_scalar(member_type)) + return false; + } + return true; +} + static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, @@ -5455,6 +5495,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, enum bpf_prog_type prog_type = env->prog->type == BPF_PROG_TYPE_EXT ? env->prog->aux->dst_prog->type : env->prog->type; struct bpf_verifier_log *log = &env->log; + bool is_kfunc = btf_is_kernel(btf); const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; const struct btf_param *args; @@ -5507,7 +5548,20 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); ref_tname = btf_name_by_offset(btf, ref_t->name_off); - if (btf_is_kernel(btf)) { + if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { + /* If function expects ctx type in BTF check that caller + * is passing PTR_TO_CTX. + */ + if (reg->type != PTR_TO_CTX) { + bpf_log(log, + "arg#%d expected pointer to ctx, but got %s\n", + i, btf_type_str(t)); + return -EINVAL; + } + if (check_ptr_off_reg(env, reg, regno)) + return -EINVAL; + } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || + (reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)))) { const struct btf_type *reg_ref_t; const struct btf *reg_btf; const char *reg_ref_tname; @@ -5523,14 +5577,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (reg->type == PTR_TO_BTF_ID) { reg_btf = reg->btf; reg_ref_id = reg->btf_id; - } else if (reg2btf_ids[base_type(reg->type)]) { + } else { reg_btf = btf_vmlinux; reg_ref_id = *reg2btf_ids[base_type(reg->type)]; - } else { - bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n", - func_name, i, - btf_type_str(ref_t), ref_tname, regno); - return -EINVAL; } reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, @@ -5546,22 +5595,24 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, reg_ref_tname); return -EINVAL; } - } else if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { - /* If function expects ctx type in BTF check that caller - * is passing PTR_TO_CTX. - */ - if (reg->type != PTR_TO_CTX) { - bpf_log(log, - "arg#%d expected pointer to ctx, but got %s\n", - i, btf_type_str(t)); - return -EINVAL; - } - if (check_ctx_reg(env, reg, regno)) - return -EINVAL; } else if (ptr_to_mem_ok) { const struct btf_type *resolve_ret; u32 type_size; + if (is_kfunc) { + /* Permit pointer to mem, but only when argument + * type is pointer to scalar, or struct composed + * (recursively) of scalars. + */ + if (!btf_type_is_scalar(ref_t) && + !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) { + bpf_log(log, + "arg#%d pointer type %s %s must point to scalar or struct with scalar\n", + i, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + } + resolve_ret = btf_resolve_size(btf, ref_t, &type_size); if (IS_ERR(resolve_ret)) { bpf_log(log, @@ -5574,6 +5625,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (check_mem_reg(env, reg, regno, type_size)) return -EINVAL; } else { + bpf_log(log, "reg type unsupported for arg#%d %sfunction %s#%d\n", i, + is_kfunc ? "kernel " : "", func_name, func_id); return -EINVAL; } } @@ -5623,7 +5676,7 @@ int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs) { - return btf_check_func_arg_match(env, btf, func_id, regs, false); + return btf_check_func_arg_match(env, btf, func_id, regs, true); } /* Convert BTF of a function into bpf_reg_state if possible diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index a8429cfb4a..0848d5691f 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -764,15 +764,9 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) list_add(&bq->flush_node, flush_list); } -int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, +int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx) { - struct xdp_frame *xdpf; - - xdpf = xdp_convert_buff_to_frame(xdp); - if (unlikely(!xdpf)) - return -EOVERFLOW; - /* Info needed when constructing SKB on remote CPU */ xdpf->dev_rx = dev_rx; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index b591073c5f..bbf3ec03aa 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -468,24 +468,19 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, bq->q[bq->count++] = xdpf; } -static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, +static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_prog *xdp_prog) { - struct xdp_frame *xdpf; int err; if (!dev->netdev_ops->ndo_xdp_xmit) return -EOPNOTSUPP; - err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); + err = xdp_ok_fwd_dev(dev, xdpf->len); if (unlikely(err)) return err; - xdpf = xdp_convert_buff_to_frame(xdp); - if (unlikely(!xdpf)) - return -EOVERFLOW; - bq_enqueue(dev, xdpf, dev_rx, xdp_prog); return 0; } @@ -521,27 +516,27 @@ static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev return act; } -int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, +int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx) { - return __xdp_enqueue(dev, xdp, dev_rx, NULL); + return __xdp_enqueue(dev, xdpf, dev_rx, NULL); } -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, struct net_device *dev_rx) { struct net_device *dev = dst->dev; - return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog); + return __xdp_enqueue(dev, xdpf, dev_rx, dst->xdp_prog); } -static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp) +static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf) { if (!obj || !obj->dev->netdev_ops->ndo_xdp_xmit) return false; - if (xdp_ok_fwd_dev(obj->dev, xdp->data_end - xdp->data)) + if (xdp_ok_fwd_dev(obj->dev, xdpf->len)) return false; return true; @@ -587,14 +582,13 @@ static int get_upper_ifindexes(struct net_device *dev, int *indexes) return n; } -int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, +int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dst, *last_dst = NULL; int excluded_devices[1+MAX_NEST_DEV]; struct hlist_head *head; - struct xdp_frame *xdpf; int num_excluded = 0; unsigned int i; int err; @@ -604,15 +598,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, excluded_devices[num_excluded++] = dev_rx->ifindex; } - xdpf = xdp_convert_buff_to_frame(xdp); - if (unlikely(!xdpf)) - return -EOVERFLOW; - if (map->map_type == BPF_MAP_TYPE_DEVMAP) { for (i = 0; i < map->max_entries; i++) { dst = rcu_dereference_check(dtab->netdev_map[i], rcu_read_lock_bh_held()); - if (!is_valid_dst(dst, xdp)) + if (!is_valid_dst(dst, xdpf)) continue; if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) @@ -635,7 +625,7 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, head = dev_map_index_hash(dtab, i); hlist_for_each_entry_rcu(dst, head, index_hlist, lockdep_is_held(&dtab->index_lock)) { - if (!is_valid_dst(dst, xdp)) + if (!is_valid_dst(dst, xdpf)) continue; if (is_ifindex_excluded(excluded_devices, num_excluded, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f099c5481b..07ca1157f9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -573,6 +573,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env, if (type & MEM_RDONLY) strncpy(prefix, "rdonly_", 16); + if (type & MEM_ALLOC) + strncpy(prefix, "alloc_", 16); snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", prefix, str[base_type(type)], postfix); @@ -3980,16 +3982,17 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env, } #endif -int check_ctx_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno) +static int __check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno, + bool fixed_off_ok) { - /* Access to ctx or passing it to a helper is only allowed in - * its original, unmodified form. + /* Access to this pointer-typed register or passing it to a helper + * is only allowed in its original, unmodified form. */ - if (reg->off) { - verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n", - regno, reg->off); + if (!fixed_off_ok && reg->off) { + verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", + reg_type_str(env, reg->type), regno, reg->off); return -EACCES; } @@ -3997,13 +4000,20 @@ int check_ctx_reg(struct bpf_verifier_env *env, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf); + verbose(env, "variable %s access var_off=%s disallowed\n", + reg_type_str(env, reg->type), tn_buf); return -EACCES; } return 0; } +int check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno) +{ + return __check_ptr_off_reg(env, reg, regno, false); +} + static int __check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg, @@ -4320,6 +4330,11 @@ static int check_stack_access_within_bounds( err = check_stack_slot_within_bounds(min_off, state, type); if (!err && max_off > 0) err = -EINVAL; /* out of stack access into non-negative offsets */ + if (!err && access_size < 0) + /* access_size should not be negative (or overflow an int); others checks + * along the way should have prevented such an access. + */ + err = -EFAULT; /* invalid negative access size; integer overflow? */ if (err) { if (tnum_is_const(reg->var_off)) { @@ -4442,7 +4457,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return -EACCES; } - err = check_ctx_reg(env, reg, regno); + err = check_ptr_off_reg(env, reg, regno); if (err < 0) return err; @@ -5144,6 +5159,7 @@ static const struct bpf_reg_types mem_types = { PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, + PTR_TO_MEM | MEM_ALLOC, PTR_TO_BUF, }, }; @@ -5161,7 +5177,7 @@ static const struct bpf_reg_types int_ptr_types = { static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; -static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } }; +static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } }; static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; @@ -5261,12 +5277,6 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, kernel_type_name(btf_vmlinux, *arg_btf_id)); return -EACCES; } - - if (!tnum_is_const(reg->var_off) || reg->var_off.value) { - verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", - regno); - return -EACCES; - } } return 0; @@ -5321,10 +5331,33 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, if (err) return err; - if (type == PTR_TO_CTX) { - err = check_ctx_reg(env, reg, regno); + switch ((u32)type) { + case SCALAR_VALUE: + /* Pointer types where reg offset is explicitly allowed: */ + case PTR_TO_PACKET: + case PTR_TO_PACKET_META: + case PTR_TO_MAP_KEY: + case PTR_TO_MAP_VALUE: + case PTR_TO_MEM: + case PTR_TO_MEM | MEM_RDONLY: + case PTR_TO_MEM | MEM_ALLOC: + case PTR_TO_BUF: + case PTR_TO_BUF | MEM_RDONLY: + case PTR_TO_STACK: + /* Some of the argument types nevertheless require a + * zero register offset. + */ + if (arg_type == ARG_PTR_TO_ALLOC_MEM) + goto force_off_check; + break; + /* All the rest must be rejected: */ + default: +force_off_check: + err = __check_ptr_off_reg(env, reg, regno, + type == PTR_TO_BTF_ID); if (err < 0) return err; + break; } skip_type_check: @@ -5535,7 +5568,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) enum bpf_attach_type eatype = env->prog->expected_attach_type; enum bpf_prog_type type = resolve_prog_type(env->prog); - if (func_id != BPF_FUNC_map_update_elem) + if (func_id != BPF_FUNC_map_update_elem && + func_id != BPF_FUNC_map_delete_elem) return false; /* It's not possible to get access to a locked struct sock in these @@ -5546,6 +5580,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) if (eatype == BPF_TRACE_ITER) return true; break; + case BPF_PROG_TYPE_SOCK_OPS: + /* map_update allowed only via dedicated helpers with event type checks */ + if (func_id == BPF_FUNC_map_delete_elem) + return true; + break; case BPF_PROG_TYPE_SOCKET_FILTER: case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: @@ -5633,7 +5672,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_SOCKMAP: if (func_id != BPF_FUNC_sk_redirect_map && func_id != BPF_FUNC_sock_map_update && - func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_map && func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem && @@ -5643,7 +5681,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_SOCKHASH: if (func_id != BPF_FUNC_sk_redirect_hash && func_id != BPF_FUNC_sock_hash_update && - func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_hash && func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem && @@ -9556,7 +9593,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } - err = check_ctx_reg(env, ®s[ctx_reg], ctx_reg); + err = check_ptr_off_reg(env, ®s[ctx_reg], ctx_reg); if (err < 0) return err; @@ -11842,8 +11879,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) f = fdget(fd); map = __bpf_map_get(f); if (IS_ERR(map)) { - verbose(env, "fd %d is not pointing to valid bpf_map\n", - insn[0].imm); + verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); return PTR_ERR(map); } diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 6905079c15..82df5a07a8 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1938,7 +1938,7 @@ bool current_cpuset_is_being_rebound(void) static int update_relax_domain_level(struct cpuset *cs, s64 val) { #ifdef CONFIG_SMP - if (val < -1 || val >= sched_domain_level_max) + if (val < -1 || val > sched_domain_level_max + 1) return -EINVAL; #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 0e786de993..1c370f87d8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2722,7 +2722,8 @@ enum cpu_mitigations { }; static enum cpu_mitigations cpu_mitigations __ro_after_init = - CPU_MITIGATIONS_AUTO; + IS_ENABLED(CONFIG_CPU_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : + CPU_MITIGATIONS_OFF; static int __init mitigations_parse_cmdline(char *arg) { diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 6735ac36b7..a3b4b55d2e 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -172,6 +172,33 @@ char kdb_getchar(void) unreachable(); } +/** + * kdb_position_cursor() - Place cursor in the correct horizontal position + * @prompt: Nil-terminated string containing the prompt string + * @buffer: Nil-terminated string containing the entire command line + * @cp: Cursor position, pointer the character in buffer where the cursor + * should be positioned. + * + * The cursor is positioned by sending a carriage-return and then printing + * the content of the line until we reach the correct cursor position. + * + * There is some additional fine detail here. + * + * Firstly, even though kdb_printf() will correctly format zero-width fields + * we want the second call to kdb_printf() to be conditional. That keeps things + * a little cleaner when LOGGING=1. + * + * Secondly, we can't combine everything into one call to kdb_printf() since + * that renders into a fixed length buffer and the combined print could result + * in unwanted truncation. + */ +static void kdb_position_cursor(char *prompt, char *buffer, char *cp) +{ + kdb_printf("\r%s", kdb_prompt_str); + if (cp > buffer) + kdb_printf("%.*s", (int)(cp - buffer), buffer); +} + /* * kdb_read * @@ -200,7 +227,6 @@ static char *kdb_read(char *buffer, size_t bufsize) * and null byte */ char *lastchar; char *p_tmp; - char tmp; static char tmpbuffer[CMD_BUFLEN]; int len = strlen(buffer); int len_tmp; @@ -237,12 +263,8 @@ static char *kdb_read(char *buffer, size_t bufsize) } *(--lastchar) = '\0'; --cp; - kdb_printf("\b%s \r", cp); - tmp = *cp; - *cp = '\0'; - kdb_printf(kdb_prompt_str); - kdb_printf("%s", buffer); - *cp = tmp; + kdb_printf("\b%s ", cp); + kdb_position_cursor(kdb_prompt_str, buffer, cp); } break; case 13: /* enter */ @@ -259,19 +281,14 @@ static char *kdb_read(char *buffer, size_t bufsize) memcpy(tmpbuffer, cp+1, lastchar - cp - 1); memcpy(cp, tmpbuffer, lastchar - cp - 1); *(--lastchar) = '\0'; - kdb_printf("%s \r", cp); - tmp = *cp; - *cp = '\0'; - kdb_printf(kdb_prompt_str); - kdb_printf("%s", buffer); - *cp = tmp; + kdb_printf("%s ", cp); + kdb_position_cursor(kdb_prompt_str, buffer, cp); } break; case 1: /* Home */ if (cp > buffer) { - kdb_printf("\r"); - kdb_printf(kdb_prompt_str); cp = buffer; + kdb_position_cursor(kdb_prompt_str, buffer, cp); } break; case 5: /* End */ @@ -287,11 +304,10 @@ static char *kdb_read(char *buffer, size_t bufsize) } break; case 14: /* Down */ - memset(tmpbuffer, ' ', - strlen(kdb_prompt_str) + (lastchar-buffer)); - *(tmpbuffer+strlen(kdb_prompt_str) + - (lastchar-buffer)) = '\0'; - kdb_printf("\r%s\r", tmpbuffer); + case 16: /* Up */ + kdb_printf("\r%*c\r", + (int)(strlen(kdb_prompt_str) + (lastchar - buffer)), + ' '); *lastchar = (char)key; *(lastchar+1) = '\0'; return lastchar; @@ -301,15 +317,6 @@ static char *kdb_read(char *buffer, size_t bufsize) ++cp; } break; - case 16: /* Up */ - memset(tmpbuffer, ' ', - strlen(kdb_prompt_str) + (lastchar-buffer)); - *(tmpbuffer+strlen(kdb_prompt_str) + - (lastchar-buffer)) = '\0'; - kdb_printf("\r%s\r", tmpbuffer); - *lastchar = (char)key; - *(lastchar+1) = '\0'; - return lastchar; case 9: /* Tab */ if (tab < 2) ++tab; @@ -353,15 +360,25 @@ static char *kdb_read(char *buffer, size_t bufsize) kdb_printf("\n"); kdb_printf(kdb_prompt_str); kdb_printf("%s", buffer); + if (cp != lastchar) + kdb_position_cursor(kdb_prompt_str, buffer, cp); } else if (tab != 2 && count > 0) { - len_tmp = strlen(p_tmp); - strncpy(p_tmp+len_tmp, cp, lastchar-cp+1); - len_tmp = strlen(p_tmp); - strncpy(cp, p_tmp+len, len_tmp-len + 1); - len = len_tmp - len; - kdb_printf("%s", cp); - cp += len; - lastchar += len; + /* How many new characters do we want from tmpbuffer? */ + len_tmp = strlen(p_tmp) - len; + if (lastchar + len_tmp >= bufend) + len_tmp = bufend - lastchar; + + if (len_tmp) { + /* + 1 ensures the '\0' is memmove'd */ + memmove(cp+len_tmp, cp, (lastchar-cp) + 1); + memcpy(cp, p_tmp+len, len_tmp); + kdb_printf("%s", cp); + cp += len_tmp; + lastchar += len_tmp; + if (cp != lastchar) + kdb_position_cursor(kdb_prompt_str, + buffer, cp); + } } kdb_nextline = 1; /* reset output line number */ break; @@ -372,13 +389,9 @@ static char *kdb_read(char *buffer, size_t bufsize) memcpy(cp+1, tmpbuffer, lastchar - cp); *++lastchar = '\0'; *cp = key; - kdb_printf("%s\r", cp); + kdb_printf("%s", cp); ++cp; - tmp = *cp; - *cp = '\0'; - kdb_printf(kdb_prompt_str); - kdb_printf("%s", buffer); - *cp = tmp; + kdb_position_cursor(kdb_prompt_str, buffer, cp); } else { *++lastchar = '\0'; *cp++ = key; diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index 9b9af1bd6b..b7f8bb7a1e 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -124,7 +124,6 @@ static int do_map_benchmark(struct map_benchmark_data *map) struct task_struct **tsk; int threads = map->bparam.threads; int node = map->bparam.node; - const cpumask_t *cpu_mask = cpumask_of_node(node); u64 loops; int ret = 0; int i; @@ -145,7 +144,7 @@ static int do_map_benchmark(struct map_benchmark_data *map) } if (node != NUMA_NO_NODE) - kthread_bind_mask(tsk[i], cpu_mask); + kthread_bind_mask(tsk[i], cpumask_of_node(node)); } /* clear the old value in the previous benchmark */ @@ -231,7 +230,8 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, } if (map->bparam.node != NUMA_NO_NODE && - !node_possible(map->bparam.node)) { + (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES || + !node_possible(map->bparam.node))) { pr_err("invalid numa node\n"); return -EINVAL; } diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 9478eccd1c..c9dbc8f581 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -777,6 +777,18 @@ size_t dma_max_mapping_size(struct device *dev) } EXPORT_SYMBOL_GPL(dma_max_mapping_size); +size_t dma_opt_mapping_size(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + size_t size = SIZE_MAX; + + if (ops && ops->opt_mapping_size) + size = ops->opt_mapping_size(); + + return min(dma_max_mapping_size(dev), size); +} +EXPORT_SYMBOL_GPL(dma_opt_mapping_size); + bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { const struct dma_map_ops *ops = get_dma_ops(dev); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index a9849670bd..5c7ed5d519 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -469,8 +469,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(dev, mem->start) & boundary_mask; unsigned long max_slots = get_max_slots(boundary_mask); - unsigned int iotlb_align_mask = - dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); + unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); unsigned int nslots = nr_slots(alloc_size), stride; unsigned int index, wrap, count = 0, i; unsigned int offset = swiotlb_align_offset(dev, orig_addr); @@ -478,6 +477,14 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, BUG_ON(!nslots); + /* + * Ensure that the allocation is at least slot-aligned and update + * 'iotlb_align_mask' to ignore bits that will be preserved when + * offsetting into the allocation. + */ + alloc_align_mask |= (IO_TLB_SIZE - 1); + iotlb_align_mask &= ~alloc_align_mask; + /* * For mappings with an alignment requirement don't bother looping to * unaligned slots once we found an aligned one. For allocations of diff --git a/kernel/entry/common.c b/kernel/entry/common.c index e002bea6b4..d11d4047a0 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -73,8 +73,14 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, /* Either of the above might have changed the syscall number */ syscall = syscall_get_nr(current, regs); - if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) { trace_sys_enter(regs, syscall); + /* + * Probes or BPF hooks in the tracepoint may have changed the + * system call number as well. + */ + syscall = syscall_get_nr(current, regs); + } syscall_enter_audit(regs, syscall); diff --git a/kernel/events/core.c b/kernel/events/core.c index 4e5a73c7db..e79cd0fd1d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7119,9 +7119,16 @@ static void perf_output_read_group(struct perf_output_handle *handle, { struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; + unsigned long flags; u64 values[6]; int n = 0; + /* + * Disabling interrupts avoids all counter scheduling + * (context switches, timer based rotation and IPIs). + */ + local_irq_save(flags); + values[n++] = 1 + leader->nr_siblings; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -7157,6 +7164,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, __output_copy(handle, values, n * sizeof(u64)); } + + local_irq_restore(flags); } #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 39a41c56ad..24fd9db84c 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -69,6 +69,14 @@ static bool migrate_one_irq(struct irq_desc *desc) return false; } + /* + * Complete an eventually pending irq move cleanup. If this + * interrupt was moved in hard irq context, then the vectors need + * to be cleaned up. It can't wait until this interrupt actually + * happens and this CPU was involved. + */ + irq_force_complete_move(desc); + /* * No move required, if: * - Interrupt is per cpu @@ -87,14 +95,6 @@ static bool migrate_one_irq(struct irq_desc *desc) return false; } - /* - * Complete an eventually pending irq move cleanup. If this - * interrupt was moved in hard irq context, then the vectors need - * to be cleaned up. It can't wait until this interrupt actually - * happens and this CPU was involved. - */ - irq_force_complete_move(desc); - /* * If there is a setaffinity pending, then try to reuse the pending * mask, so the last change of the affinity does not get lost. If diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5d713a7d7e..af57705e1f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1564,10 +1564,17 @@ static int check_kprobe_address_safe(struct kprobe *p, jump_label_lock(); preempt_disable(); - /* Ensure it is not in reserved area nor out of text */ - if (!(core_kernel_text((unsigned long) p->addr) || - is_module_text_address((unsigned long) p->addr)) || - in_gate_area_no_mm((unsigned long) p->addr) || + /* Ensure the address is in a text area, and find a module if exists. */ + *probed_mod = NULL; + if (!core_kernel_text((unsigned long) p->addr)) { + *probed_mod = __module_text_address((unsigned long) p->addr); + if (!(*probed_mod)) { + ret = -EINVAL; + goto out; + } + } + /* Ensure it is not in reserved area. */ + if (in_gate_area_no_mm((unsigned long) p->addr) || within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr) || static_call_text_reserved(p->addr, p->addr) || @@ -1577,8 +1584,7 @@ static int check_kprobe_address_safe(struct kprobe *p, goto out; } - /* Check if are we probing a module */ - *probed_mod = __module_text_address((unsigned long) p->addr); + /* Get module refcount and reject __init functions for loaded modules. */ if (*probed_mod) { /* * We must hold a refcount of the probed module while updating diff --git a/kernel/kthread.c b/kernel/kthread.c index e319a1b625..4cc6897b7c 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -268,6 +268,21 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); +/** + * kthread_exit - Cause the current kthread return @result to kthread_stop(). + * @result: The integer value to return to kthread_stop(). + * + * While kthread_exit can be called directly, it exists so that + * functions which do some additional work in non-modular code such as + * module_put_and_kthread_exit can be implemented. + * + * Does not return. + */ +void __noreturn kthread_exit(long result) +{ + do_exit(result); +} + static int kthread(void *_create) { /* Copy data: it's on kthread's stack */ @@ -285,13 +300,13 @@ static int kthread(void *_create) done = xchg(&create->done, NULL); if (!done) { kfree(create); - do_exit(-EINTR); + kthread_exit(-EINTR); } if (!self) { create->result = ERR_PTR(-ENOMEM); complete(done); - do_exit(-ENOMEM); + kthread_exit(-ENOMEM); } self->threadfn = threadfn; @@ -318,7 +333,7 @@ static int kthread(void *_create) __kthread_parkme(self); ret = threadfn(data); } - do_exit(ret); + kthread_exit(ret); } /* called from kernel_clone() to get node information for about to be created task */ @@ -628,7 +643,7 @@ EXPORT_SYMBOL_GPL(kthread_park); * instead of calling wake_up_process(): the thread will exit without * calling threadfn(). * - * If threadfn() may call do_exit() itself, the caller must ensure + * If threadfn() may call kthread_exit() itself, the caller must ensure * task_struct can't go away. * * Returns the result of threadfn(), or %-EINTR if wake_up_process() diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index f0287a16b4..4a38d32b89 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -133,14 +133,19 @@ * the owner value concurrently without lock. Read from owner, however, * may not need READ_ONCE() as long as the pointer value is only used * for comparison and isn't being dereferenced. + * + * Both rwsem_{set,clear}_owner() functions should be in the same + * preempt disable section as the atomic op that changes sem->count. */ static inline void rwsem_set_owner(struct rw_semaphore *sem) { + lockdep_assert_preemption_disabled(); atomic_long_set(&sem->owner, (long)current); } static inline void rwsem_clear_owner(struct rw_semaphore *sem) { + lockdep_assert_preemption_disabled(); atomic_long_set(&sem->owner, 0); } @@ -251,13 +256,16 @@ static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp) static inline bool rwsem_write_trylock(struct rw_semaphore *sem) { long tmp = RWSEM_UNLOCKED_VALUE; + bool ret = false; + preempt_disable(); if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) { rwsem_set_owner(sem); - return true; + ret = true; } - return false; + preempt_enable(); + return ret; } /* @@ -1341,8 +1349,10 @@ static inline void __up_write(struct rw_semaphore *sem) DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) && !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem); + preempt_disable(); rwsem_clear_owner(sem); tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); + preempt_enable(); if (unlikely(tmp & RWSEM_FLAG_WAITERS)) rwsem_wake(sem); } diff --git a/kernel/module.c b/kernel/module.c index 4d49c32af5..2226b591b5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -335,14 +335,14 @@ static inline void add_taint_module(struct module *mod, unsigned flag, /* * A thread that wants to hold a reference to a module only while it - * is running can call this to safely exit. nfsd and lockd use this. + * is running can call this to safely exit. */ -void __noreturn __module_put_and_exit(struct module *mod, long code) +void __noreturn __module_put_and_kthread_exit(struct module *mod, long code) { module_put(mod); - do_exit(code); + kthread_exit(code); } -EXPORT_SYMBOL(__module_put_and_exit); +EXPORT_SYMBOL(__module_put_and_kthread_exit); /* Find a module section: 0 means not found. */ static unsigned int find_sec(const struct load_info *info, const char *name) diff --git a/kernel/panic.c b/kernel/panic.c index 47933d4c76..3b14c6d123 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -425,6 +425,14 @@ void panic(const char *fmt, ...) /* Do not scroll important messages printed above */ suppress_printk = 1; + + /* + * The final messages may not have been printed if in a context that + * defers printing (such as NMI) and irq_work is not available. + * Explicitly flush the kernel log buffer one last time. + */ + console_flush_on_panic(CONSOLE_FLUSH_PENDING); + local_irq_enable(); for (i = 0; ; i += PANIC_TIMER_STEP) { touch_softlockup_watchdog(); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 13d905dd32..5d617639e8 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -187,6 +187,7 @@ static int __init mem_sleep_default_setup(char *str) if (mem_sleep_labels[state] && !strcmp(str, mem_sleep_labels[state])) { mem_sleep_default = state; + mem_sleep_current = state; break; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8b110b245d..323931ff61 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1908,6 +1908,12 @@ static int console_trylock_spinning(void) */ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + /* + * Update @console_may_schedule for trylock because the previous + * owner may have been schedulable. + */ + console_may_schedule = 0; + return 1; } @@ -2879,6 +2885,21 @@ static int __init keep_bootcon_setup(char *str) early_param("keep_bootcon", keep_bootcon_setup); +static int console_call_setup(struct console *newcon, char *options) +{ + int err; + + if (!newcon->setup) + return 0; + + /* Synchronize with possible boot console. */ + console_lock(); + err = newcon->setup(newcon, options); + console_unlock(); + + return err; +} + /* * This is called by register_console() to try to match * the newly registered console with any of the ones selected @@ -2888,7 +2909,8 @@ early_param("keep_bootcon", keep_bootcon_setup); * Care need to be taken with consoles that are statically * enabled such as netconsole */ -static int try_enable_new_console(struct console *newcon, bool user_specified) +static int try_enable_preferred_console(struct console *newcon, + bool user_specified) { struct console_cmdline *c; int i, err; @@ -2913,8 +2935,8 @@ static int try_enable_new_console(struct console *newcon, bool user_specified) if (_braille_register_console(newcon, c)) return 0; - if (newcon->setup && - (err = newcon->setup(newcon, c->options)) != 0) + err = console_call_setup(newcon, c->options); + if (err) return err; } newcon->flags |= CON_ENABLED; @@ -2936,6 +2958,23 @@ static int try_enable_new_console(struct console *newcon, bool user_specified) return -ENOENT; } +/* Try to enable the console unconditionally */ +static void try_enable_default_console(struct console *newcon) +{ + if (newcon->index < 0) + newcon->index = 0; + + if (console_call_setup(newcon, NULL) != 0) + return; + + newcon->flags |= CON_ENABLED; + + if (newcon->device) { + newcon->flags |= CON_CONSDEV; + has_preferred_console = true; + } +} + /* * The console driver calls this routine during kernel initialization * to register the console printing procedure with printk() and to @@ -2991,25 +3030,15 @@ void register_console(struct console *newcon) * didn't select a console we take the first one * that registers here. */ - if (!has_preferred_console) { - if (newcon->index < 0) - newcon->index = 0; - if (newcon->setup == NULL || - newcon->setup(newcon, NULL) == 0) { - newcon->flags |= CON_ENABLED; - if (newcon->device) { - newcon->flags |= CON_CONSDEV; - has_preferred_console = true; - } - } - } + if (!has_preferred_console) + try_enable_default_console(newcon); /* See if this console matches one we selected on the command line */ - err = try_enable_new_console(newcon, true); + err = try_enable_preferred_console(newcon, true); /* If not, try to match against the platform default(s) */ if (err == -ENOENT) - err = try_enable_new_console(newcon, false); + err = try_enable_preferred_console(newcon, false); /* printk() messages are not printed to the Braille console. */ if (err || newcon->flags & CON_BRL) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 25b8ea9116..b43da6201b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10828,7 +10828,7 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of, { struct task_group *tg = css_tg(of_css(of)); u64 period = tg_get_cfs_period(tg); - u64 burst = tg_get_cfs_burst(tg); + u64 burst = tg->cfs_bandwidth.burst; u64 quota; int ret; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4a1393405a..94fcd585eb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5746,21 +5746,41 @@ static inline unsigned long cpu_util(int cpu); static inline bool cpu_overutilized(int cpu) { - unsigned long rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); - unsigned long rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); + unsigned long rq_util_min, rq_util_max; + + if (!sched_energy_enabled()) + return false; + + rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); + rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); return !util_fits_cpu(cpu_util(cpu), rq_util_min, rq_util_max, cpu); } -static inline void update_overutilized_status(struct rq *rq) +static inline void set_rd_overutilized_status(struct root_domain *rd, + unsigned int status) { - if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) { - WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED); - trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED); - } + if (!sched_energy_enabled()) + return; + + WRITE_ONCE(rd->overutilized, status); + trace_sched_overutilized_tp(rd, !!status); +} + +static inline void check_update_overutilized_status(struct rq *rq) +{ + /* + * overutilized field is used for load balancing decisions only + * if energy aware scheduler is being used + */ + if (!sched_energy_enabled()) + return; + + if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) + set_rd_overutilized_status(rq->rd, SG_OVERUTILIZED); } #else -static inline void update_overutilized_status(struct rq *rq) { } +static inline void check_update_overutilized_status(struct rq *rq) { } #endif /* Runqueue only has SCHED_IDLE tasks enqueued */ @@ -5868,7 +5888,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) * and the following generally works well enough in practice. */ if (!task_new) - update_overutilized_status(rq); + check_update_overutilized_status(rq); enqueue_throttle: if (cfs_bandwidth_used()) { @@ -9577,19 +9597,14 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd env->fbq_type = fbq_classify_group(&sds->busiest_stat); if (!env->sd->parent) { - struct root_domain *rd = env->dst_rq->rd; - /* update overload indicator if we are at root domain */ - WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD); + WRITE_ONCE(env->dst_rq->rd->overload, sg_status & SG_OVERLOAD); /* Update over-utilization (tipping point, U >= 0) indicator */ - WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED); - trace_sched_overutilized_tp(rd, sg_status & SG_OVERUTILIZED); + set_rd_overutilized_status(env->dst_rq->rd, + sg_status & SG_OVERUTILIZED); } else if (sg_status & SG_OVERUTILIZED) { - struct root_domain *rd = env->dst_rq->rd; - - WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED); - trace_sched_overutilized_tp(rd, SG_OVERUTILIZED); + set_rd_overutilized_status(env->dst_rq->rd, SG_OVERUTILIZED); } update_idle_cpu_scan(env, sum_util); @@ -11460,7 +11475,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) task_tick_numa(rq, curr); update_misfit_status(curr, rq); - update_overutilized_status(task_rq(curr)); + check_update_overutilized_status(task_rq(curr)); task_tick_core(rq, curr); } diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 4e8698e62f..8c82ca3aa6 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1405,7 +1405,7 @@ static void set_domain_attribute(struct sched_domain *sd, } else request = attr->relax_domain_level; - if (sd->level > request) { + if (sd->level >= request) { /* Turn off idle balance on this domain: */ sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); } diff --git a/kernel/softirq.c b/kernel/softirq.c index 41f470929e..dc60f0c66a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -512,7 +512,7 @@ static inline bool lockdep_softirq_start(void) { return false; } static inline void lockdep_softirq_end(bool in_hardirq) { } #endif -asmlinkage __visible void __softirq_entry __do_softirq(void) +static void handle_softirqs(bool ksirqd) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; @@ -567,8 +567,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) pending >>= softirq_bit; } - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && - __this_cpu_read(ksoftirqd) == current) + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd) rcu_softirq_qs(); local_irq_disable(); @@ -588,6 +587,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) current_restore_flags(old_flags, PF_MEMALLOC); } +asmlinkage __visible void __softirq_entry __do_softirq(void) +{ + handle_softirqs(false); +} + /** * irq_enter_rcu - Enter an interrupt context with RCU watching */ @@ -918,7 +922,7 @@ static void run_ksoftirqd(unsigned int cpu) * We can safely run softirq on inline stack, as we are not deep * in the task stack here. */ - __do_softirq(); + handle_softirqs(true); ksoftirqd_run_end(); cond_resched(); return; diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9dd2a39cb3..d4ce3ebe2c 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1030,7 +1030,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option /* * We are trying to schedule the timer on the new base. * However we can't change timer's base while it is running, - * otherwise del_timer_sync() can't detect that the timer's + * otherwise timer_delete_sync() can't detect that the timer's * handler yet has not finished. This also guarantees that the * timer is serialized wrt itself. */ @@ -1068,14 +1068,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option } /** - * mod_timer_pending - modify a pending timer's timeout - * @timer: the pending timer to be modified - * @expires: new timeout in jiffies + * mod_timer_pending - Modify a pending timer's timeout + * @timer: The pending timer to be modified + * @expires: New absolute timeout in jiffies * - * mod_timer_pending() is the same for pending timers as mod_timer(), - * but will not re-activate and modify already deleted timers. + * mod_timer_pending() is the same for pending timers as mod_timer(), but + * will not activate inactive timers. * - * It is useful for unserialized use of timers. + * Return: + * * %0 - The timer was inactive and not modified + * * %1 - The timer was active and requeued to expire at @expires */ int mod_timer_pending(struct timer_list *timer, unsigned long expires) { @@ -1084,24 +1086,27 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) EXPORT_SYMBOL(mod_timer_pending); /** - * mod_timer - modify a timer's timeout - * @timer: the timer to be modified - * @expires: new timeout in jiffies - * - * mod_timer() is a more efficient way to update the expire field of an - * active timer (if the timer is inactive it will be activated) + * mod_timer - Modify a timer's timeout + * @timer: The timer to be modified + * @expires: New absolute timeout in jiffies * * mod_timer(timer, expires) is equivalent to: * * del_timer(timer); timer->expires = expires; add_timer(timer); * + * mod_timer() is more efficient than the above open coded sequence. In + * case that the timer is inactive, the del_timer() part is a NOP. The + * timer is in any case activated with the new expiry time @expires. + * * Note that if there are multiple unserialized concurrent users of the * same timer, then mod_timer() is the only safe way to modify the timeout, * since add_timer() cannot modify an already running timer. * - * The function returns whether it has modified a pending timer or not. - * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an - * active timer returns 1.) + * Return: + * * %0 - The timer was inactive and started + * * %1 - The timer was active and requeued to expire at @expires or + * the timer was active and not modified because @expires did + * not change the effective expiry time */ int mod_timer(struct timer_list *timer, unsigned long expires) { @@ -1112,11 +1117,18 @@ EXPORT_SYMBOL(mod_timer); /** * timer_reduce - Modify a timer's timeout if it would reduce the timeout * @timer: The timer to be modified - * @expires: New timeout in jiffies + * @expires: New absolute timeout in jiffies * * timer_reduce() is very similar to mod_timer(), except that it will only - * modify a running timer if that would reduce the expiration time (it will - * start a timer that isn't running). + * modify an enqueued timer if that would reduce the expiration time. If + * @timer is not enqueued it starts the timer. + * + * Return: + * * %0 - The timer was inactive and started + * * %1 - The timer was active and requeued to expire at @expires or + * the timer was active and not modified because @expires + * did not change the effective expiry time such that the + * timer would expire earlier than already scheduled */ int timer_reduce(struct timer_list *timer, unsigned long expires) { @@ -1125,18 +1137,21 @@ int timer_reduce(struct timer_list *timer, unsigned long expires) EXPORT_SYMBOL(timer_reduce); /** - * add_timer - start a timer - * @timer: the timer to be added + * add_timer - Start a timer + * @timer: The timer to be started * - * The kernel will do a ->function(@timer) callback from the - * timer interrupt at the ->expires point in the future. The - * current time is 'jiffies'. + * Start @timer to expire at @timer->expires in the future. @timer->expires + * is the absolute expiry time measured in 'jiffies'. When the timer expires + * timer->function(timer) will be invoked from soft interrupt context. * - * The timer's ->expires, ->function fields must be set prior calling this - * function. + * The @timer->expires and @timer->function fields must be set prior + * to calling this function. * - * Timers with an ->expires field in the past will be executed in the next - * timer tick. + * If @timer->expires is already in the past @timer will be queued to + * expire at the next timer tick. + * + * This can only operate on an inactive timer. Attempts to invoke this on + * an active timer are rejected with a warning. */ void add_timer(struct timer_list *timer) { @@ -1146,11 +1161,13 @@ void add_timer(struct timer_list *timer) EXPORT_SYMBOL(add_timer); /** - * add_timer_on - start a timer on a particular CPU - * @timer: the timer to be added - * @cpu: the CPU to start it on + * add_timer_on - Start a timer on a particular CPU + * @timer: The timer to be started + * @cpu: The CPU to start it on + * + * Same as add_timer() except that it starts the timer on the given CPU. * - * This is not very scalable on SMP. Double adds are not possible. + * See add_timer() for further details. */ void add_timer_on(struct timer_list *timer, int cpu) { @@ -1185,15 +1202,18 @@ void add_timer_on(struct timer_list *timer, int cpu) EXPORT_SYMBOL_GPL(add_timer_on); /** - * del_timer - deactivate a timer. - * @timer: the timer to be deactivated - * - * del_timer() deactivates a timer - this works on both active and inactive - * timers. - * - * The function returns whether it has deactivated a pending timer or not. - * (ie. del_timer() of an inactive timer returns 0, del_timer() of an - * active timer returns 1.) + * del_timer - Deactivate a timer. + * @timer: The timer to be deactivated + * + * The function only deactivates a pending timer, but contrary to + * timer_delete_sync() it does not take into account whether the timer's + * callback function is concurrently executed on a different CPU or not. + * It neither prevents rearming of the timer. If @timer can be rearmed + * concurrently then the return value of this function is meaningless. + * + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ int del_timer(struct timer_list *timer) { @@ -1215,10 +1235,19 @@ EXPORT_SYMBOL(del_timer); /** * try_to_del_timer_sync - Try to deactivate a timer - * @timer: timer to delete + * @timer: Timer to deactivate + * + * This function tries to deactivate a timer. On success the timer is not + * queued and the timer callback function is not running on any CPU. + * + * This function does not guarantee that the timer cannot be rearmed right + * after dropping the base lock. That needs to be prevented by the calling + * code if necessary. * - * This function tries to deactivate a timer. Upon successful (ret >= 0) - * exit the timer is not queued and the handler is not running on any CPU. + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated + * * %-1 - The timer callback function is running on a different CPU */ int try_to_del_timer_sync(struct timer_list *timer) { @@ -1312,25 +1341,20 @@ static inline void timer_sync_wait_running(struct timer_base *base) { } static inline void del_timer_wait_running(struct timer_list *timer) { } #endif -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) /** - * del_timer_sync - deactivate a timer and wait for the handler to finish. - * @timer: the timer to be deactivated - * - * This function only differs from del_timer() on SMP: besides deactivating - * the timer it also makes sure the handler has finished executing on other - * CPUs. + * timer_delete_sync - Deactivate a timer and wait for the handler to finish. + * @timer: The timer to be deactivated * * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from * interrupt contexts unless the timer is an irqsafe one. The caller must - * not hold locks which would prevent completion of the timer's - * handler. The timer's handler must not call add_timer_on(). Upon exit the - * timer is not queued and the handler is not running on any CPU. + * not hold locks which would prevent completion of the timer's callback + * function. The timer's handler must not call add_timer_on(). Upon exit + * the timer is not queued and the handler is not running on any CPU. * - * Note: For !irqsafe timers, you must not hold locks that are held in - * interrupt context while calling this function. Even if the lock has - * nothing to do with the timer in question. Here's why:: + * For !irqsafe timers, the caller must not hold locks that are held in + * interrupt context. Even if the lock has nothing to do with the timer in + * question. Here's why:: * * CPU0 CPU1 * ---- ---- @@ -1340,16 +1364,23 @@ static inline void del_timer_wait_running(struct timer_list *timer) { } * spin_lock_irq(somelock); * * spin_lock(somelock); - * del_timer_sync(mytimer); + * timer_delete_sync(mytimer); * while (base->running_timer == mytimer); * - * Now del_timer_sync() will never return and never release somelock. - * The interrupt on the other CPU is waiting to grab somelock but - * it has interrupted the softirq that CPU0 is waiting to finish. + * Now timer_delete_sync() will never return and never release somelock. + * The interrupt on the other CPU is waiting to grab somelock but it has + * interrupted the softirq that CPU0 is waiting to finish. + * + * This function cannot guarantee that the timer is not rearmed again by + * some concurrent or preempting code, right after it dropped the base + * lock. If there is the possibility of a concurrent rearm then the return + * value of the function is meaningless. * - * The function returns whether it has deactivated a pending timer or not. + * Return: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ -int del_timer_sync(struct timer_list *timer) +int timer_delete_sync(struct timer_list *timer) { int ret; @@ -1389,8 +1420,7 @@ int del_timer_sync(struct timer_list *timer) return ret; } -EXPORT_SYMBOL(del_timer_sync); -#endif +EXPORT_SYMBOL(timer_delete_sync); static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *), @@ -1412,8 +1442,8 @@ static void call_timer_fn(struct timer_list *timer, #endif /* * Couple the lock chain with the lock chain at - * del_timer_sync() by acquiring the lock_map around the fn() - * call here and in del_timer_sync(). + * timer_delete_sync() by acquiring the lock_map around the fn() + * call here and in timer_delete_sync(). */ lock_map_acquire(&lockdep_map); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d9bed77f96..f9f0c198cb 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -406,7 +406,6 @@ struct rb_irq_work { struct irq_work work; wait_queue_head_t waiters; wait_queue_head_t full_waiters; - long wait_index; bool waiters_pending; bool full_waiters_pending; bool wakeup_full; @@ -874,8 +873,19 @@ static void rb_wake_up_waiters(struct irq_work *work) wake_up_all(&rbwork->waiters); if (rbwork->full_waiters_pending || rbwork->wakeup_full) { + /* Only cpu_buffer sets the above flags */ + struct ring_buffer_per_cpu *cpu_buffer = + container_of(rbwork, struct ring_buffer_per_cpu, irq_work); + + /* Called from interrupt context */ + raw_spin_lock(&cpu_buffer->reader_lock); rbwork->wakeup_full = false; rbwork->full_waiters_pending = false; + + /* Waking up all waiters, they will reset the shortest full */ + cpu_buffer->shortest_full = 0; + raw_spin_unlock(&cpu_buffer->reader_lock); + wake_up_all(&rbwork->full_waiters); } } @@ -915,14 +925,95 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) rbwork = &cpu_buffer->irq_work; } - rbwork->wait_index++; - /* make sure the waiters see the new index */ - smp_wmb(); - /* This can be called in any context */ irq_work_queue(&rbwork->work); } +static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer; + bool ret = false; + + /* Reads of all CPUs always waits for any data */ + if (cpu == RING_BUFFER_ALL_CPUS) + return !ring_buffer_empty(buffer); + + cpu_buffer = buffer->buffers[cpu]; + + if (!ring_buffer_empty_cpu(buffer, cpu)) { + unsigned long flags; + bool pagebusy; + + if (!full) + return true; + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; + ret = !pagebusy && full_hit(buffer, cpu, full); + + if (!ret && (!cpu_buffer->shortest_full || + cpu_buffer->shortest_full > full)) { + cpu_buffer->shortest_full = full; + } + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + } + return ret; +} + +static inline bool +rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer, + int cpu, int full, ring_buffer_cond_fn cond, void *data) +{ + if (rb_watermark_hit(buffer, cpu, full)) + return true; + + if (cond(data)) + return true; + + /* + * The events can happen in critical sections where + * checking a work queue can cause deadlocks. + * After adding a task to the queue, this flag is set + * only to notify events to try to wake up the queue + * using irq_work. + * + * We don't clear it even if the buffer is no longer + * empty. The flag only causes the next event to run + * irq_work to do the work queue wake up. The worse + * that can happen if we race with !trace_empty() is that + * an event will cause an irq_work to try to wake up + * an empty queue. + * + * There's no reason to protect this flag either, as + * the work queue and irq_work logic will do the necessary + * synchronization for the wake ups. The only thing + * that is necessary is that the wake up happens after + * a task has been queued. It's OK for spurious wake ups. + */ + if (full) + rbwork->full_waiters_pending = true; + else + rbwork->waiters_pending = true; + + return false; +} + +/* + * The default wait condition for ring_buffer_wait() is to just to exit the + * wait loop the first time it is woken up. + */ +static bool rb_wait_once(void *data) +{ + long *once = data; + + /* wait_event() actually calls this twice before scheduling*/ + if (*once > 1) + return true; + + (*once)++; + return false; +} + /** * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on @@ -936,101 +1027,39 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer; - DEFINE_WAIT(wait); - struct rb_irq_work *work; - long wait_index; + struct wait_queue_head *waitq; + ring_buffer_cond_fn cond; + struct rb_irq_work *rbwork; + void *data; + long once = 0; int ret = 0; + cond = rb_wait_once; + data = &once; + /* * Depending on what the caller is waiting for, either any * data in any cpu buffer, or a specific buffer, put the * caller on the appropriate wait queue. */ if (cpu == RING_BUFFER_ALL_CPUS) { - work = &buffer->irq_work; + rbwork = &buffer->irq_work; /* Full only makes sense on per cpu reads */ full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return -ENODEV; cpu_buffer = buffer->buffers[cpu]; - work = &cpu_buffer->irq_work; - } - - wait_index = READ_ONCE(work->wait_index); - - while (true) { - if (full) - prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); - else - prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); - - /* - * The events can happen in critical sections where - * checking a work queue can cause deadlocks. - * After adding a task to the queue, this flag is set - * only to notify events to try to wake up the queue - * using irq_work. - * - * We don't clear it even if the buffer is no longer - * empty. The flag only causes the next event to run - * irq_work to do the work queue wake up. The worse - * that can happen if we race with !trace_empty() is that - * an event will cause an irq_work to try to wake up - * an empty queue. - * - * There's no reason to protect this flag either, as - * the work queue and irq_work logic will do the necessary - * synchronization for the wake ups. The only thing - * that is necessary is that the wake up happens after - * a task has been queued. It's OK for spurious wake ups. - */ - if (full) - work->full_waiters_pending = true; - else - work->waiters_pending = true; - - if (signal_pending(current)) { - ret = -EINTR; - break; - } - - if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) - break; - - if (cpu != RING_BUFFER_ALL_CPUS && - !ring_buffer_empty_cpu(buffer, cpu)) { - unsigned long flags; - bool pagebusy; - bool done; - - if (!full) - break; - - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; - done = !pagebusy && full_hit(buffer, cpu, full); - - if (!cpu_buffer->shortest_full || - cpu_buffer->shortest_full > full) - cpu_buffer->shortest_full = full; - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (done) - break; - } - - schedule(); - - /* Make sure to see the new wait index */ - smp_rmb(); - if (wait_index != work->wait_index) - break; + rbwork = &cpu_buffer->irq_work; } if (full) - finish_wait(&work->full_waiters, &wait); + waitq = &rbwork->full_waiters; else - finish_wait(&work->waiters, &wait); + waitq = &rbwork->waiters; + + ret = wait_event_interruptible((*waitq), + rb_wait_cond(rbwork, buffer, cpu, full, cond, data)); return ret; } @@ -1054,30 +1083,51 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full) { struct ring_buffer_per_cpu *cpu_buffer; - struct rb_irq_work *work; + struct rb_irq_work *rbwork; if (cpu == RING_BUFFER_ALL_CPUS) { - work = &buffer->irq_work; + rbwork = &buffer->irq_work; full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; - work = &cpu_buffer->irq_work; + rbwork = &cpu_buffer->irq_work; } if (full) { - poll_wait(filp, &work->full_waiters, poll_table); - work->full_waiters_pending = true; + unsigned long flags; + + poll_wait(filp, &rbwork->full_waiters, poll_table); + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); if (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; - } else { - poll_wait(filp, &work->waiters, poll_table); - work->waiters_pending = true; + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + if (full_hit(buffer, cpu, full)) + return EPOLLIN | EPOLLRDNORM; + /* + * Only allow full_waiters_pending update to be seen after + * the shortest_full is set. If the writer sees the + * full_waiters_pending flag set, it will compare the + * amount in the ring buffer to shortest_full. If the amount + * in the ring buffer is greater than the shortest_full + * percent, it will call the irq_work handler to wake up + * this list. The irq_handler will reset shortest_full + * back to zero. That's done under the reader_lock, but + * the below smp_mb() makes sure that the update to + * full_waiters_pending doesn't leak up into the above. + */ + smp_mb(); + rbwork->full_waiters_pending = true; + return 0; } + poll_wait(filp, &rbwork->waiters, poll_table); + rbwork->waiters_pending = true; + /* * There's a tight race between setting the waiters_pending and * checking if the ring buffer is empty. Once the waiters_pending bit @@ -1093,9 +1143,6 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, */ smp_mb(); - if (full) - return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0; - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) return EPOLLIN | EPOLLRDNORM; @@ -1462,7 +1509,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); - local_inc(&cpu_buffer->pages_touched); /* * Just make sure we have seen our old_write and synchronize * with any interrupts that come in. @@ -1499,8 +1545,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, */ local_set(&next_page->page->commit, 0); - /* Again, either we update tail_page or an interrupt does */ - (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page); + /* Either we update tail_page or an interrupt does */ + if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page)) + local_inc(&cpu_buffer->pages_touched); } } @@ -1521,6 +1568,11 @@ static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, * * As a safety measure we check to make sure the data pages have not * been corrupted. + * + * Callers of this function need to guarantee that the list of pages doesn't get + * modified during the check. In particular, if it's possible that the function + * is invoked with concurrent readers which can swap in a new reader page then + * the caller should take cpu_buffer->reader_lock. */ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) { @@ -2242,8 +2294,12 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, */ synchronize_rcu(); for_each_buffer_cpu(buffer, cpu) { + unsigned long flags; + cpu_buffer = buffer->buffers[cpu]; + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); rb_check_pages(cpu_buffer); + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } atomic_dec(&buffer->record_disabled); } @@ -4343,7 +4399,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter) cpu_buffer = iter->cpu_buffer; reader = cpu_buffer->reader_page; head_page = cpu_buffer->head_page; - commit_page = cpu_buffer->commit_page; + commit_page = READ_ONCE(cpu_buffer->commit_page); commit_ts = commit_page->page->time_stamp; /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 87ad97dbe8..ab56c8a61e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8228,6 +8228,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, return size; } +static int tracing_buffers_flush(struct file *file, fl_owner_t id) +{ + struct ftrace_buffer_info *info = file->private_data; + struct trace_iterator *iter = &info->iter; + + iter->wait_index++; + /* Make sure the waiters see the new wait_index */ + smp_wmb(); + + ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); + + return 0; +} + static int tracing_buffers_release(struct inode *inode, struct file *file) { struct ftrace_buffer_info *info = file->private_data; @@ -8239,12 +8253,6 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); - iter->wait_index++; - /* Make sure the waiters see the new wait_index */ - smp_wmb(); - - ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); - if (info->spare) ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); @@ -8458,6 +8466,7 @@ static const struct file_operations tracing_buffers_fops = { .read = tracing_buffers_read, .poll = tracing_buffers_poll, .release = tracing_buffers_release, + .flush = tracing_buffers_flush, .splice_read = tracing_buffers_splice_read, .unlocked_ioctl = tracing_buffers_ioctl, .llseek = no_llseek, diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 083f648e32..61e3a2620f 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -401,7 +401,8 @@ void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp) BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "perf buffer not large enough")) + "perf buffer not large enough, wanted %d, have %d", + size, PERF_MAX_TRACE_SIZE)) return NULL; *rctxp = rctx = perf_swevent_get_recursion_context(); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0a7348b90b..1f4f3096b9 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1645,6 +1645,7 @@ static int trace_format_open(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_PERF_EVENTS static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -1659,6 +1660,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); } +#endif static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, @@ -2104,10 +2106,12 @@ static const struct file_operations ftrace_event_format_fops = { .release = seq_release, }; +#ifdef CONFIG_PERF_EVENTS static const struct file_operations ftrace_event_id_fops = { .read = event_id_read, .llseek = default_llseek, }; +#endif static const struct file_operations ftrace_event_filter_fops = { .open = tracing_open_file_tr, diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index dfdbcf1da2..106f981384 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1161,10 +1161,8 @@ register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) { - int ret = tracing_alloc_snapshot_instance(file->tr); - - if (ret < 0) - return ret; + if (tracing_alloc_snapshot_instance(file->tr) != 0) + return 0; return register_trigger(glob, ops, data, file); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 64d6292cf6..2025b624fb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -319,7 +319,7 @@ config DEBUG_INFO_DWARF5 endchoice # "DWARF version" config DEBUG_INFO_BTF - bool "Generate BTF typeinfo" + bool "Generate BTF type information" depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST help @@ -331,7 +331,8 @@ config PAHOLE_HAS_SPLIT_BTF def_bool PAHOLE_VERSION >= 119 config DEBUG_INFO_BTF_MODULES - def_bool y + bool "Generate BTF type information for kernel modules" + default y depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF help Generate compact split BTF type information for kernel modules. @@ -424,6 +425,7 @@ config SECTION_MISMATCH_WARN_ONLY config DEBUG_FORCE_FUNCTION_ALIGN_64B bool "Force all function address 64B aligned" depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC) + select FUNCTION_ALIGNMENT_64B help There are cases that a commit from one domain changes the function address alignment of other domains, and cause magic performance diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 2ca56c22a1..27a5a28c41 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -260,7 +260,11 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) } else { for (end = buf; *end && !isspace(*end); end++) ; - BUG_ON(end == buf); + if (end == buf) { + pr_err("parse err after word:%d=%s\n", nwords, + nwords ? words[nwords - 1] : ""); + return -EINVAL; + } } /* `buf' is start of word, `end' is one past its end */ diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c index ce39ce9f35..2829ddb0e3 100644 --- a/lib/pci_iomap.c +++ b/lib/pci_iomap.c @@ -170,8 +170,8 @@ void pci_iounmap(struct pci_dev *dev, void __iomem *p) if (addr >= start && addr < start + IO_SPACE_LIMIT) return; - iounmap(p); #endif + iounmap(p); } EXPORT_SYMBOL(pci_iounmap); diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c index 8662dc6cb5..e8b13b6202 100644 --- a/lib/slub_kunit.c +++ b/lib/slub_kunit.c @@ -39,7 +39,7 @@ static void test_next_pointer(struct kunit *test) ptr_addr = (unsigned long *)(p + s->offset); tmp = *ptr_addr; - p[s->offset] = 0x12; + p[s->offset] = ~p[s->offset]; /* * Expecting three errors. diff --git a/lib/stackdepot.c b/lib/stackdepot.c index e90f0f19e7..d31f23570a 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -294,10 +294,10 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic - * contexts and I/O. + * contexts, I/O, nolockdep. */ alloc_flags &= ~GFP_ZONEMASK; - alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); + alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP); alloc_flags |= __GFP_NOWARN; page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER); if (page) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 89f444cabd..ffedc34714 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -403,7 +403,8 @@ static void kmalloc_oob_16(struct kunit *test) /* This test is specifically crafted for the generic mode. */ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); + /* RELOC_HIDE to prevent gcc from warning about short alloc */ + ptr1 = RELOC_HIDE(kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL), 0); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); @@ -500,7 +501,7 @@ static void kmalloc_oob_in_memset(struct kunit *test) kfree(ptr); } -static void kmalloc_memmove_invalid_size(struct kunit *test) +static void kmalloc_memmove_negative_size(struct kunit *test) { char *ptr; size_t size = 64; @@ -522,6 +523,21 @@ static void kmalloc_memmove_invalid_size(struct kunit *test) kfree(ptr); } +static void kmalloc_memmove_invalid_size(struct kunit *test) +{ + char *ptr; + size_t size = 64; + volatile size_t invalid_size = size; + + ptr = kmalloc(size, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + + memset((char *)ptr, 0, 64); + KUNIT_EXPECT_KASAN_FAIL(test, + memmove((char *)ptr, (char *)ptr + 4, invalid_size)); + kfree(ptr); +} + static void kmalloc_uaf(struct kunit *test) { char *ptr; @@ -1139,6 +1155,7 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kmalloc_oob_memset_4), KUNIT_CASE(kmalloc_oob_memset_8), KUNIT_CASE(kmalloc_oob_memset_16), + KUNIT_CASE(kmalloc_memmove_negative_size), KUNIT_CASE(kmalloc_memmove_invalid_size), KUNIT_CASE(kmalloc_uaf), KUNIT_CASE(kmalloc_uaf_memset), diff --git a/mm/compaction.c b/mm/compaction.c index 89517ad5d6..3ec4a2e399 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2582,16 +2582,11 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum compact_priority prio, struct page **capture) { - int may_perform_io = gfp_mask & __GFP_IO; struct zoneref *z; struct zone *zone; enum compact_result rc = COMPACT_SKIPPED; - /* - * Check if the GFP flags allow compaction - GFP_NOIO is really - * tricky context because the migration might require IO - */ - if (!may_perform_io) + if (!gfp_compaction_allowed(gfp_mask)) return COMPACT_SKIPPED; trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio); diff --git a/mm/memory.c b/mm/memory.c index 6044d9a4bc..99d15abe4a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5086,6 +5086,10 @@ int follow_phys(struct vm_area_struct *vma, goto out; pte = *ptep; + /* Never return PFNs of anon folios in COW mappings. */ + if (vm_normal_page(vma, address, pte)) + goto unlock; + if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; diff --git a/mm/memtest.c b/mm/memtest.c index f53ace709c..d407373f22 100644 --- a/mm/memtest.c +++ b/mm/memtest.c @@ -46,10 +46,10 @@ static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t size last_bad = 0; for (p = start; p < end; p++) - *p = pattern; + WRITE_ONCE(*p, pattern); for (p = start; p < end; p++, start_phys_aligned += incr) { - if (*p == pattern) + if (READ_ONCE(*p) == pattern) continue; if (start_phys_aligned == last_bad + incr) { last_bad += incr; diff --git a/mm/migrate.c b/mm/migrate.c index c7d5566623..c37af50f31 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -424,8 +424,12 @@ int migrate_page_move_mapping(struct address_space *mapping, if (PageSwapBacked(page)) { __SetPageSwapBacked(newpage); if (PageSwapCache(page)) { + int i; + SetPageSwapCache(newpage); - set_page_private(newpage, page_private(page)); + for (i = 0; i < (1 << compound_order(page)); i++) + set_page_private(newpage + i, + page_private(page + i)); } } else { VM_BUG_ON_PAGE(PageSwapCache(page), page); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ec3333a1f7..264efa022f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4903,6 +4903,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct alloc_context *ac) { bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM; + bool can_compact = gfp_compaction_allowed(gfp_mask); const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER; struct page *page = NULL; unsigned int alloc_flags; @@ -4968,7 +4969,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * Don't try this for allocations that are allowed to ignore * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen. */ - if (can_direct_reclaim && + if (can_direct_reclaim && can_compact && (costly_order || (order > 0 && ac->migratetype != MIGRATE_MOVABLE)) && !gfp_pfmemalloc_allowed(gfp_mask)) { @@ -5065,9 +5066,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, /* * Do not retry costly high order allocations unless they are - * __GFP_RETRY_MAYFAIL + * __GFP_RETRY_MAYFAIL and we can compact */ - if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) + if (costly_order && (!can_compact || + !(gfp_mask & __GFP_RETRY_MAYFAIL))) goto nopage; if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, @@ -5080,7 +5082,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * implementation of the compaction depends on the sufficient amount * of free memory (see __compaction_suitable) */ - if (did_some_progress > 0 && + if (did_some_progress > 0 && can_compact && should_compact_retry(ac, order, alloc_flags, compact_result, &compact_priority, &compaction_retries)) diff --git a/mm/swapfile.c b/mm/swapfile.c index b7e1620ade..fec3e736a1 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1236,6 +1236,18 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, } /* + * When we get a swap entry, if there aren't some other ways to + * prevent swapoff, such as the folio in swap cache is locked, page + * table lock is held, etc., the swap entry may become invalid because + * of swapoff. Then, we need to enclose all swap related functions + * with get_swap_device() and put_swap_device(), unless the swap + * functions call get/put_swap_device() by themselves. + * + * Note that when only holding the PTL, swapoff might succeed immediately + * after freeing a swap entry. Therefore, immediately after + * __swap_entry_free(), the swap info might become stale and should not + * be touched without a prior get_swap_device(). + * * Check whether swap entry is valid in the swap device. If so, * return pointer to swap_info_struct, and keep the swap entry valid * via preventing the swap device from being swapoff, until @@ -1244,9 +1256,8 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, * Notice that swapoff or swapoff+swapon can still happen before the * percpu_ref_tryget_live() in get_swap_device() or after the * percpu_ref_put() in put_swap_device() if there isn't any other way - * to prevent swapoff, such as page lock, page table lock, etc. The - * caller must be prepared for that. For example, the following - * situation is possible. + * to prevent swapoff. The caller must be prepared for that. For + * example, the following situation is possible. * * CPU1 CPU2 * do_swap_page() @@ -1762,13 +1773,19 @@ int free_swap_and_cache(swp_entry_t entry) if (non_swap_entry(entry)) return 1; - p = _swap_info_get(entry); + p = get_swap_device(entry); if (p) { + if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) { + put_swap_device(p); + return 0; + } + count = __swap_entry_free(p, entry); if (count == SWAP_HAS_CACHE && !swap_page_trans_huge_swapped(p, entry)) __try_to_reclaim_swap(p, swp_offset(entry), TTRS_UNMAPPED | TTRS_FULL); + put_swap_device(p); } return p != NULL; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 201acea818..342a78a865 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2834,7 +2834,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) /* Use reclaim/compaction for costly allocs or under memory pressure */ static bool in_reclaim_compaction(struct scan_control *sc) { - if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && + if (gfp_compaction_allowed(sc->gfp_mask) && sc->order && (sc->order > PAGE_ALLOC_COSTLY_ORDER || sc->priority < DEF_PRIORITY - 2)) return true; @@ -3167,6 +3167,9 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) unsigned long watermark; enum compact_result suitable; + if (!gfp_compaction_allowed(sc->gfp_mask)) + return false; + suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); if (suitable == COMPACT_SUCCESS) /* Allocation should succeed already. Don't reclaim. */ diff --git a/net/9p/client.c b/net/9p/client.c index ead458486f..bf29462c91 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -233,6 +233,8 @@ static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc, if (!fc->sdata) return -ENOMEM; fc->capacity = alloc_msize; + fc->id = 0; + fc->tag = P9_NOTAG; return 0; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 1e1cf0e8a1..660a5594a6 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3948,7 +3948,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) spin_lock_bh(&bat_priv->tt.commit_lock); - while (true) { + while (timeout) { table_size = batadv_tt_local_table_transmit_size(bat_priv); if (packet_size_max >= table_size) break; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 72f47b3727..a796d72c7d 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -535,7 +535,7 @@ static int bnep_session(void *arg) up_write(&bnep_session_sem); free_netdev(dev); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 83eb84e8e6..90d130588a 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -323,7 +323,7 @@ static int cmtp_session(void *arg) up_write(&cmtp_session_sem); kfree(session); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index d112b2bc37..2f9b2165ec 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -216,10 +216,12 @@ static int conn_info_min_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val > hdev->conn_info_max_age) + hci_dev_lock(hdev); + if (val == 0 || val > hdev->conn_info_max_age) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->conn_info_min_age = val; hci_dev_unlock(hdev); @@ -244,10 +246,12 @@ static int conn_info_max_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val < hdev->conn_info_min_age) + hci_dev_lock(hdev); + if (val == 0 || val < hdev->conn_info_min_age) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->conn_info_max_age = val; hci_dev_unlock(hdev); @@ -565,10 +569,12 @@ static int sniff_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val > hdev->sniff_max_interval) + hci_dev_lock(hdev); + if (val == 0 || val % 2 || val > hdev->sniff_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->sniff_min_interval = val; hci_dev_unlock(hdev); @@ -593,10 +599,12 @@ static int sniff_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val < hdev->sniff_min_interval) + hci_dev_lock(hdev); + if (val == 0 || val % 2 || val < hdev->sniff_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->sniff_max_interval = val; hci_dev_unlock(hdev); @@ -848,10 +856,12 @@ static int conn_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) + hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_conn_min_interval = val; hci_dev_unlock(hdev); @@ -876,10 +886,12 @@ static int conn_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) + hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_conn_max_interval = val; hci_dev_unlock(hdev); @@ -988,10 +1000,12 @@ static int adv_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) + hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_adv_min_interval = val; hci_dev_unlock(hdev); @@ -1016,10 +1030,12 @@ static int adv_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) + hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_adv_max_interval = val; hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0be37a5c1c..13d397493e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2729,6 +2729,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_ENCRYPT, &hdev->flags)) set_bit(HCI_CONN_ENCRYPT, &conn->flags); + /* "Link key request" completed ahead of "connect request" completes */ + if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) && + ev->link_type == ACL_LINK) { + struct link_key *key; + struct hci_cp_read_enc_key_size cp; + + key = hci_find_link_key(hdev, &ev->bdaddr); + if (key) { + set_bit(HCI_CONN_ENCRYPT, &conn->flags); + + if (!(hdev->commands[20] & 0x10)) { + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } else { + cp.handle = cpu_to_le16(conn->handle); + if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, + sizeof(cp), &cp)) { + bt_dev_err(hdev, "sending read key size failed"); + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } + } + + hci_encrypt_cfm(conn, ev->status); + } + } + /* Get remote features */ if (conn->type == ACL_LINK) { struct hci_cp_read_remote_features cp; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index c2db60ad0f..90392c8fe5 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -108,8 +108,10 @@ static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; - if (skb) + if (skb) { + kfree_skb(hdev->req_skb); hdev->req_skb = skb_get(skb); + } wake_up_interruptible(&hdev->req_wait_q); } } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 021ab957a5..8ff45fb6f7 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1305,7 +1305,7 @@ static int hidp_session_thread(void *arg) l2cap_unregister_user(session->conn, &session->user); hidp_session_put(session); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 11bfc8737e..900b352975 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -435,6 +435,9 @@ static void l2cap_chan_timeout(struct work_struct *work) BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); + if (!conn) + return; + mutex_lock(&conn->chan_lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling * this work. No need to call l2cap_chan_hold(chan) here again. diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 756523e540..3a2be1b4a5 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -456,7 +456,8 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct l2cap_options opts; struct l2cap_conninfo cinfo; - int len, err = 0; + int err = 0; + size_t len; u32 opt; BT_DBG("sk %p", sk); @@ -503,7 +504,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, BT_DBG("mode 0x%2.2x", chan->mode); - len = min_t(unsigned int, len, sizeof(opts)); + len = min(len, sizeof(opts)); if (copy_to_user(optval, (char *) &opts, len)) err = -EFAULT; @@ -553,7 +554,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, cinfo.hci_handle = chan->conn->hcon->handle; memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3); - len = min_t(unsigned int, len, sizeof(cinfo)); + len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *) &cinfo, len)) err = -EFAULT; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 9a8814d456..431e09cac1 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -83,6 +83,10 @@ static void sco_sock_timeout(struct work_struct *work) struct sock *sk; sco_conn_lock(conn); + if (!conn->hcon) { + sco_conn_unlock(conn); + return; + } sk = conn->sk; if (sk) sock_hold(sk); @@ -904,7 +908,8 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, struct sock *sk = sock->sk; struct sco_options opts; struct sco_conninfo cinfo; - int len, err = 0; + int err = 0; + size_t len; BT_DBG("sk %p", sk); @@ -926,7 +931,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, BT_DBG("mtu %u", opts.mtu); - len = min_t(unsigned int, len, sizeof(opts)); + len = min(len, sizeof(opts)); if (copy_to_user(optval, (char *)&opts, len)) err = -EFAULT; @@ -944,7 +949,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle; memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3); - len = min_t(unsigned int, len, sizeof(cinfo)); + len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *)&cinfo, len)) err = -EFAULT; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 011bd3c59d..1b66c27611 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -253,6 +253,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; const unsigned char *src = eth_hdr(skb)->h_source; + struct sk_buff *nskb; if (!should_deliver(p, skb)) return; @@ -261,12 +262,16 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, if (skb->dev == p->dev && ether_addr_equal(src, addr)) return; - skb = skb_copy(skb, GFP_ATOMIC); - if (!skb) { + __skb_push(skb, ETH_HLEN); + nskb = pskb_copy(skb, GFP_ATOMIC); + __skb_pull(skb, ETH_HLEN); + if (!nskb) { DEV_STATS_INC(dev, tx_dropped); return; } + skb = nskb; + __skb_pull(skb, ETH_HLEN); if (!is_broadcast_ether_addr(addr)) memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 54bfcdf692..f3d49343f7 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) return netif_receive_skb(skb); } -static int br_pass_frame_up(struct sk_buff *skb) +static int br_pass_frame_up(struct sk_buff *skb, bool promisc) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); @@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb) br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb), BR_MCAST_DIR_TX); + BR_INPUT_SKB_CB(skb)->promisc = promisc; + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(indev), NULL, skb, indev, NULL, br_netif_receive_skb); @@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb struct net_bridge_mcast *brmctx; struct net_bridge_vlan *vlan; struct net_bridge *br; + bool promisc; u16 vid = 0; u8 state; @@ -102,7 +105,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (p->flags & BR_LEARNING) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0); - local_rcv = !!(br->dev->flags & IFF_PROMISC); + promisc = !!(br->dev->flags & IFF_PROMISC); + local_rcv = promisc; + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { /* by definition the broadcast is also a multicast address */ if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) { @@ -165,7 +170,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb unsigned long now = jiffies; if (test_bit(BR_FDB_LOCAL, &dst->flags)) - return br_pass_frame_up(skb); + return br_pass_frame_up(skb, false); if (now != dst->used) dst->used = now; @@ -178,7 +183,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb } if (local_rcv) - return br_pass_frame_up(skb); + return br_pass_frame_up(skb, promisc); out: return 0; @@ -350,6 +355,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) goto forward; } + BR_INPUT_SKB_CB(skb)->promisc = false; + /* The else clause should be hit when nf_hook(): * - returns < 0 (drop/error) * - returns = 0 (stolen/nf_queue) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 8a114a5000..9981e0dfdd 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -584,11 +584,17 @@ static unsigned int br_nf_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + bool promisc = BR_INPUT_SKB_CB(skb)->promisc; struct nf_conntrack *nfct = skb_nfct(skb); const struct nf_ct_hook *ct_hook; struct nf_conn *ct; int ret; + if (promisc) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + if (!nfct || skb->pkt_type == PACKET_HOST) return NF_ACCEPT; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e365cf82f0..a1a703b7d5 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -619,7 +619,7 @@ void br_ifinfo_notify(int event, const struct net_bridge *br, { u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; - return br_info_notify(event, br, port, filter); + br_info_notify(event, br, port, filter); } /* diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index ff10ddeeb5..fe61d3b8d0 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -547,6 +547,7 @@ struct br_input_skb_cb { #endif u8 proxyarp_replied:1; u8 src_port_isolated:1; + u8 promisc:1; #ifdef CONFIG_BRIDGE_VLAN_FILTERING u8 vlan_filtered:1; #endif diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c0389199c0..2f3ea11785 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) struct ebt_table_info *newinfo; struct ebt_replace tmp; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1422,6 +1424,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len) { struct ebt_replace hlp; + if (len < sizeof(hlp)) + return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; @@ -2351,6 +2355,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg, { struct compat_ebt_replace hlp; + if (len < sizeof(hlp)) + return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 83743e9593..fbdb1ad448 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -293,18 +293,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - enum ip_conntrack_info ctinfo; + bool promisc = BR_INPUT_SKB_CB(skb)->promisc; + struct nf_conntrack *nfct = skb_nfct(skb); struct nf_conn *ct; - if (skb->pkt_type == PACKET_HOST) + if (promisc) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + + if (!nfct || skb->pkt_type == PACKET_HOST) return NF_ACCEPT; /* nf_conntrack_confirm() cannot handle concurrent clones, * this happens for broad/multicast frames with e.g. macvlan on top * of the bridge device. */ - ct = nf_ct_get(skb, &ctinfo); - if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + ct = container_of(nfct, struct nf_conn, ct_general); + if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) return NF_ACCEPT; /* let inet prerouting call conntrack again */ diff --git a/net/core/dev.c b/net/core/dev.c index f80bc2ca88..e86ef1a164 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10549,8 +10549,9 @@ static void netdev_wait_allrefs(struct net_device *dev) rebroadcast_time = jiffies; } + rcu_barrier(); + if (!wait) { - rcu_barrier(); wait = WAIT_REFS_MIN_MSECS; } else { msleep(wait); diff --git a/net/core/filter.c b/net/core/filter.c index 457d1a164a..47eb1bd47a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3987,37 +3987,75 @@ u32 xdp_master_redirect(struct xdp_buff *xdp) } EXPORT_SYMBOL_GPL(xdp_master_redirect); -int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) +static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri, + struct net_device *dev, + struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + int err; + + ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->map_type = BPF_MAP_TYPE_UNSPEC; + + err = __xsk_map_redirect(fwd, xdp); + if (unlikely(err)) + goto err; + + _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index); + return 0; +err: + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); + return err; +} + +static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, + struct net_device *dev, + struct xdp_frame *xdpf, + struct bpf_prog *xdp_prog) +{ + enum bpf_map_type map_type = ri->map_type; + void *fwd = ri->tgt_value; + u32 map_id = ri->map_id; + u32 flags = ri->flags; struct bpf_map *map; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; + if (unlikely(!xdpf)) { + err = -EOVERFLOW; + goto err; + } + switch (map_type) { case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); - err = dev_map_enqueue_multi(xdp, dev, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + err = dev_map_enqueue_multi(xdpf, dev, map, + flags & BPF_F_EXCLUDE_INGRESS); } else { - err = dev_map_enqueue(fwd, xdp, dev); + err = dev_map_enqueue(fwd, xdpf, dev); } break; case BPF_MAP_TYPE_CPUMAP: - err = cpu_map_enqueue(fwd, xdp, dev); - break; - case BPF_MAP_TYPE_XSKMAP: - err = __xsk_map_redirect(fwd, xdp); + err = cpu_map_enqueue(fwd, xdpf, dev); break; case BPF_MAP_TYPE_UNSPEC: if (map_id == INT_MAX) { @@ -4026,7 +4064,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, err = -EINVAL; break; } - err = dev_xdp_enqueue(fwd, xdp, dev); + err = dev_xdp_enqueue(fwd, xdpf, dev); break; } fallthrough; @@ -4043,14 +4081,40 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); return err; } + +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + enum bpf_map_type map_type = ri->map_type; + + if (map_type == BPF_MAP_TYPE_XSKMAP) + return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); + + return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp), + xdp_prog); +} EXPORT_SYMBOL_GPL(xdp_do_redirect); +int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, + struct xdp_frame *xdpf, struct bpf_prog *xdp_prog) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + enum bpf_map_type map_type = ri->map_type; + + if (map_type == BPF_MAP_TYPE_XSKMAP) + return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); + + return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog); +} +EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); + static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, - void *fwd, - enum bpf_map_type map_type, u32 map_id) + struct bpf_prog *xdp_prog, void *fwd, + enum bpf_map_type map_type, u32 map_id, + u32 flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); struct bpf_map *map; @@ -4060,11 +4124,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_redirect_multi(dev, skb, xdp_prog, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_generic_redirect(fwd, skb, xdp_prog); } @@ -4101,9 +4174,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { @@ -4123,7 +4198,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, return 0; } - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id); + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags); err: _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err); return err; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index dcddc54d08..a209db33fa 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -68,12 +68,15 @@ DEFINE_COOKIE(net_cookie); static struct net_generic *net_alloc_generic(void) { + unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs); + unsigned int generic_size; struct net_generic *ng; - unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); + + generic_size = offsetof(struct net_generic, ptr[gen_ptrs]); ng = kzalloc(generic_size, GFP_KERNEL); if (ng) - ng->s.len = max_gen_ptrs; + ng->s.len = gen_ptrs; return ng; } @@ -1211,7 +1214,11 @@ static int register_pernet_operations(struct list_head *list, if (error < 0) return error; *ops->id = error; - max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); + /* This does not require READ_ONCE as writers already hold + * pernet_ops_rwsem. But WRITE_ONCE is needed to protect + * net_alloc_generic. + */ + WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1)); } error = __register_pernet_operations(list, ops); if (error) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ef218e290d..d25632fbfa 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2383,7 +2383,7 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) { if (nla_type(attr) != IFLA_VF_VLAN_INFO || - nla_len(attr) < NLA_HDRLEN) { + nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) { return -EINVAL; } if (len >= MAX_VLAN_LIST_LEN) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3dbefce8d1..4ec8cfd357 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1583,11 +1583,17 @@ static inline int skb_alloc_rx_flag(const struct sk_buff *skb) struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { - int headerlen = skb_headroom(skb); - unsigned int size = skb_end_offset(skb) + skb->data_len; - struct sk_buff *n = __alloc_skb(size, gfp_mask, - skb_alloc_rx_flag(skb), NUMA_NO_NODE); + struct sk_buff *n; + unsigned int size; + int headerlen; + + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + headerlen = skb_headroom(skb); + size = skb_end_offset(skb) + skb->data_len; + n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; @@ -1899,12 +1905,17 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, /* * Allocate the copy buffer */ - struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask, skb_alloc_rx_flag(skb), - NUMA_NO_NODE); - int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; + struct sk_buff *n; + int oldheadroom; + + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + oldheadroom = skb_headroom(skb); + n = __alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); if (!n) return NULL; @@ -3976,8 +3987,9 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) NAPI_GRO_CB(p)->count++; p->data_len += skb->len; - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; p->truesize += skb->truesize; p->len += skb->len; @@ -4425,8 +4437,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) } merge: - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 9cd14212dc..ec8671ecca 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -481,8 +481,6 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, msg_rx = sk_psock_peek_msg(psock); } out: - if (psock->work_state.skb && copied > 0) - schedule_work(&psock->work); return copied; } EXPORT_SYMBOL_GPL(sk_msg_recvmsg); @@ -617,42 +615,33 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, static void sk_psock_skb_state(struct sk_psock *psock, struct sk_psock_work_state *state, - struct sk_buff *skb, int len, int off) { spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { - state->skb = skb; state->len = len; state->off = off; - } else { - sock_drop(psock->sk, skb); } spin_unlock_bh(&psock->ingress_lock); } static void sk_psock_backlog(struct work_struct *work) { - struct sk_psock *psock = container_of(work, struct sk_psock, work); + struct delayed_work *dwork = to_delayed_work(work); + struct sk_psock *psock = container_of(dwork, struct sk_psock, work); struct sk_psock_work_state *state = &psock->work_state; struct sk_buff *skb = NULL; + u32 len = 0, off = 0; bool ingress; - u32 len, off; int ret; mutex_lock(&psock->work_mutex); - if (unlikely(state->skb)) { - spin_lock_bh(&psock->ingress_lock); - skb = state->skb; + if (unlikely(state->len)) { len = state->len; off = state->off; - state->skb = NULL; - spin_unlock_bh(&psock->ingress_lock); } - if (skb) - goto start; - while ((skb = skb_dequeue(&psock->ingress_skb))) { + while ((skb = skb_peek(&psock->ingress_skb))) { len = skb->len; off = 0; if (skb_bpf_strparser(skb)) { @@ -661,7 +650,6 @@ static void sk_psock_backlog(struct work_struct *work) off = stm->offset; len = stm->full_len; } -start: ingress = skb_bpf_ingress(skb); skb_bpf_redirect_clear(skb); do { @@ -671,22 +659,28 @@ static void sk_psock_backlog(struct work_struct *work) len, ingress); if (ret <= 0) { if (ret == -EAGAIN) { - sk_psock_skb_state(psock, state, skb, - len, off); + sk_psock_skb_state(psock, state, len, off); + + /* Delay slightly to prioritize any + * other work that might be here. + */ + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + schedule_delayed_work(&psock->work, 1); goto end; } /* Hard errors break pipe and stop xmit. */ sk_psock_report_error(psock, ret ? -ret : EPIPE); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); - sock_drop(psock->sk, skb); goto end; } off += ret; len -= ret; } while (len); - if (!ingress) + skb = skb_dequeue(&psock->ingress_skb); + if (!ingress) { kfree_skb(skb); + } } end: mutex_unlock(&psock->work_mutex); @@ -727,7 +721,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) INIT_LIST_HEAD(&psock->link); spin_lock_init(&psock->link_lock); - INIT_WORK(&psock->work, sk_psock_backlog); + INIT_DELAYED_WORK(&psock->work, sk_psock_backlog); mutex_init(&psock->work_mutex); INIT_LIST_HEAD(&psock->ingress_msg); spin_lock_init(&psock->ingress_lock); @@ -779,11 +773,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock) skb_bpf_redirect_clear(skb); sock_drop(psock->sk, skb); } - kfree_skb(psock->work_state.skb); - /* We null the skb here to ensure that calls to sk_psock_backlog - * do not pick up the free'd skb. - */ - psock->work_state.skb = NULL; __sk_psock_purge_ingress_msg(psock); } @@ -802,7 +791,6 @@ void sk_psock_stop(struct sk_psock *psock) spin_lock_bh(&psock->ingress_lock); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); sk_psock_cork_free(psock); - __sk_psock_zap_ingress(psock); spin_unlock_bh(&psock->ingress_lock); } @@ -816,7 +804,8 @@ static void sk_psock_destroy(struct work_struct *work) sk_psock_done_strp(psock); - cancel_work_sync(&psock->work); + cancel_delayed_work_sync(&psock->work); + __sk_psock_zap_ingress(psock); mutex_destroy(&psock->work_mutex); psock_progs_drop(&psock->progs); @@ -931,7 +920,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) } skb_queue_tail(&psock_other->ingress_skb, skb); - schedule_work(&psock_other->work); + schedule_delayed_work(&psock_other->work, 0); spin_unlock_bh(&psock_other->ingress_lock); return 0; } @@ -1011,7 +1000,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { skb_queue_tail(&psock->ingress_skb, skb); - schedule_work(&psock->work); + schedule_delayed_work(&psock->work, 0); err = 0; } spin_unlock_bh(&psock->ingress_lock); @@ -1042,7 +1031,7 @@ static void sk_psock_write_space(struct sock *sk) psock = sk_psock(sk); if (likely(psock)) { if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) - schedule_work(&psock->work); + schedule_delayed_work(&psock->work, 0); write_space = psock->saved_write_space; } rcu_read_unlock(); diff --git a/net/core/sock.c b/net/core/sock.c index 6f761f3c27..62e376f09f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -459,7 +459,7 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { + if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { atomic_inc(&sk->sk_drops); trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; @@ -511,7 +511,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, skb->dev = NULL; - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { + if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) { atomic_inc(&sk->sk_drops); goto discard_and_relse; } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index ba6d5b38fb..2ded250ac0 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -413,6 +413,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, struct sock *sk; int err = 0; + if (irqs_disabled()) + return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ + raw_spin_lock_bh(&stab->lock); sk = *psk; if (!sk_test || sk_test == sk) @@ -931,6 +934,9 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key) struct bpf_shtab_elem *elem; int ret = -ENOENT; + if (irqs_disabled()) + return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ + hash = sock_hash_bucket_hash(key, key_size); bucket = sock_hash_select_bucket(htab, hash); @@ -1571,9 +1577,10 @@ void sock_map_close(struct sock *sk, long timeout) rcu_read_unlock(); sk_psock_stop(psock); release_sock(sk); - cancel_work_sync(&psock->work); + cancel_delayed_work_sync(&psock->work); sk_psock_put(sk, psock); } + /* Make sure we do not recurse. This is a bug. * Leak the socket instead of crashing on a stack overflow. */ diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 34763f575c..543834e312 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -386,6 +386,24 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return 0; } +static struct dsa_port * +dsa_switch_preferred_default_local_cpu_port(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp; + + if (!ds->ops->preferred_default_local_cpu_port) + return NULL; + + cpu_dp = ds->ops->preferred_default_local_cpu_port(ds); + if (!cpu_dp) + return NULL; + + if (WARN_ON(!dsa_port_is_cpu(cpu_dp) || cpu_dp->ds != ds)) + return NULL; + + return cpu_dp; +} + /* Perform initial assignment of CPU ports to user ports and DSA links in the * fabric, giving preference to CPU ports local to each switch. Default to * using the first CPU port in the switch tree if the port does not have a CPU @@ -393,12 +411,16 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) */ static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) { - struct dsa_port *cpu_dp, *dp; + struct dsa_port *preferred_cpu_dp, *cpu_dp, *dp; list_for_each_entry(cpu_dp, &dst->ports, list) { if (!dsa_port_is_cpu(cpu_dp)) continue; + preferred_cpu_dp = dsa_switch_preferred_default_local_cpu_port(cpu_dp->ds); + if (preferred_cpu_dp && preferred_cpu_dp != cpu_dp) + continue; + list_for_each_entry(dp, &dst->ports, list) { /* Prefer a local CPU port */ if (dp->ds != cpu_dp->ds) @@ -1634,7 +1656,6 @@ EXPORT_SYMBOL_GPL(dsa_unregister_switch); void dsa_switch_shutdown(struct dsa_switch *ds) { struct net_device *master, *slave_dev; - LIST_HEAD(unregister_list); struct dsa_port *dp; mutex_lock(&dsa2_mutex); @@ -1655,25 +1676,13 @@ void dsa_switch_shutdown(struct dsa_switch *ds) slave_dev = dp->slave; netdev_upper_dev_unlink(master, slave_dev); - /* Just unlinking ourselves as uppers of the master is not - * sufficient. When the master net device unregisters, that will - * also call dev_close, which we will catch as NETDEV_GOING_DOWN - * and trigger a dev_close on our own devices (dsa_slave_close). - * In turn, that will call dev_mc_unsync on the master's net - * device. If the master is also a DSA switch port, this will - * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on - * its own master. Lockdep will complain about the fact that - * all cascaded masters have the same dsa_master_addr_list_lock_key, - * which it normally would not do if the cascaded masters would - * be in a proper upper/lower relationship, which we've just - * destroyed. - * To suppress the lockdep warnings, let's actually unregister - * the DSA slave interfaces too, to avoid the nonsensical - * multicast address list synchronization on shutdown. - */ - unregister_netdevice_queue(slave_dev, &unregister_list); } - unregister_netdevice_many(&unregister_list); + + /* Disconnect from further netdevice notifiers on the master, + * since netdev_uses_dsa() will now return false. + */ + dsa_switch_for_each_cpu_port(dp, ds) + dp->master->dsa_ptr = NULL; rtnl_unlock(); out: diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index a163f53569..aa5d234b63 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -489,10 +489,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, is_link_local = sja1105_is_link_local(skb); is_meta = sja1105_is_meta_frame(skb); - if (sja1105_skb_has_tag_8021q(skb)) { - /* Normal traffic path. */ - sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid); - } else if (is_link_local) { + if (is_link_local) { /* Management traffic path. Switch embeds the switch ID and * port ID into bytes of the destination MAC, courtesy of * the incl_srcpt options. @@ -506,14 +503,35 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, sja1105_meta_unpack(skb, &meta); source_port = meta.source_port; switch_id = meta.switch_id; - } else { + } + + /* Normal data plane traffic and link-local frames are tagged with + * a tag_8021q VLAN which we have to strip + */ + if (sja1105_skb_has_tag_8021q(skb)) { + int tmp_source_port = -1, tmp_switch_id = -1; + + sja1105_vlan_rcv(skb, &tmp_source_port, &tmp_switch_id, &vid); + /* Preserve the source information from the INCL_SRCPT option, + * if available. This allows us to not overwrite a valid source + * port and switch ID with zeroes when receiving link-local + * frames from a VLAN-aware bridged port (non-zero vid). + */ + if (source_port == -1) + source_port = tmp_source_port; + if (switch_id == -1) + switch_id = tmp_switch_id; + } else if (source_port == -1 && switch_id == -1) { + /* Packets with no source information have no chance of + * getting accepted, drop them straight away. + */ return NULL; } - if (source_port == -1 || switch_id == -1) - skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); - else + if (source_port != -1 && switch_id != -1) skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + else + skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 73fce94674..9ad4a15232 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -163,17 +163,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) eth = (struct ethhdr *)skb->data; skb_pull_inline(skb, ETH_HLEN); - if (unlikely(!ether_addr_equal_64bits(eth->h_dest, - dev->dev_addr))) { - if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { - if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) - skb->pkt_type = PACKET_BROADCAST; - else - skb->pkt_type = PACKET_MULTICAST; - } else { - skb->pkt_type = PACKET_OTHERHOST; - } - } + eth_skb_pkt_type(skb, dev); /* * Some variants of DSA tagging don't have an ethertype field diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 7b749a9832..38b30f6790 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -92,6 +92,7 @@ #include #include #include +#include /* * Build xmit assembly blocks @@ -1035,6 +1036,8 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) struct icmp_ext_hdr *ext_hdr, _ext_hdr; struct icmp_ext_echo_iio *iio, _iio; struct net *net = dev_net(skb->dev); + struct inet6_dev *in6_dev; + struct in_device *in_dev; struct net_device *dev; char buff[IFNAMSIZ]; u16 ident_len; @@ -1118,10 +1121,15 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) /* Fill bits in reply message */ if (dev->flags & IFF_UP) status |= ICMP_EXT_ECHOREPLY_ACTIVE; - if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) + + in_dev = __in_dev_get_rcu(dev); + if (in_dev && rcu_access_pointer(in_dev->ifa_list)) status |= ICMP_EXT_ECHOREPLY_IPV4; - if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) + + in6_dev = __in6_dev_get(dev); + if (in6_dev && !list_empty(&in6_dev->addr_list)) status |= ICMP_EXT_ECHOREPLY_IPV6; + dev_put(dev); icmphdr->un.echo.sequence |= htons(status); return true; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index da43957a58..27975a44d1 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -589,6 +589,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk) } EXPORT_SYMBOL(inet_csk_clear_xmit_timers); +void inet_csk_clear_xmit_timers_sync(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + /* ongoing timer handlers need to acquire socket lock. */ + sock_not_owned_by_me(sk); + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; + + sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); + sk_stop_timer_sync(sk, &sk->sk_timer); +} + void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index b4e0120af9..a2ab164e81 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -28,9 +28,9 @@ #include #include -static u32 inet_ehashfn(const struct net *net, const __be32 laddr, - const __u16 lport, const __be32 faddr, - const __be16 fport) +u32 inet_ehashfn(const struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) { static u32 inet_ehash_secret __read_mostly; @@ -39,6 +39,7 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr, return __inet_ehashfn(laddr, lport, faddr, fport, inet_ehash_secret + net_hash_mix(net)); } +EXPORT_SYMBOL_GPL(inet_ehashfn); /* This function handles inet_sock, but also timewait and request sockets * for IPv4/IPv6. @@ -216,20 +217,25 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, - struct sk_buff *skb, int doff, - __be32 saddr, __be16 sport, - __be32 daddr, unsigned short hnum) +INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn); + +struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum, + inet_ehashfn_t *ehashfn) { struct sock *reuse_sk = NULL; u32 phash; if (sk->sk_reuseport) { - phash = inet_ehashfn(net, daddr, hnum, saddr, sport); + phash = INDIRECT_CALL_2(ehashfn, udp_ehashfn, inet_ehashfn, + net, daddr, hnum, saddr, sport); reuse_sk = reuseport_select_sock(sk, phash, skb, doff); } return reuse_sk; } +EXPORT_SYMBOL_GPL(inet_lookup_reuseport); /* * Here are some nice properties to exploit here. The BSD API @@ -253,8 +259,8 @@ static struct sock *inet_lhash2_lookup(struct net *net, sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif); if (score > hiscore) { - result = lookup_reuseport(net, sk, skb, doff, - saddr, sport, daddr, hnum); + result = inet_lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, hnum, inet_ehashfn); if (result) return result; @@ -283,7 +289,8 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net, if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; - reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); + reuse_sk = inet_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, + inet_ehashfn); if (reuse_sk) sk = reuse_sk; return sk; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 437afe392e..25809e06f1 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -254,12 +254,12 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); +/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) { - struct inet_timewait_sock *tw; - struct sock *sk; struct hlist_nulls_node *node; unsigned int slot; + struct sock *sk; for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; @@ -268,25 +268,35 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) rcu_read_lock(); restart: sk_nulls_for_each_rcu(sk, node, &head->chain) { - if (sk->sk_state != TCP_TIME_WAIT) + int state = inet_sk_state_load(sk); + + if ((1 << state) & ~(TCPF_TIME_WAIT | + TCPF_NEW_SYN_RECV)) continue; - tw = inet_twsk(sk); - if ((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->ns.count)) + + if (sk->sk_family != family || + refcount_read(&sock_net(sk)->ns.count)) continue; - if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) continue; - if (unlikely((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->ns.count))) { - inet_twsk_put(tw); + if (unlikely(sk->sk_family != family || + refcount_read(&sock_net(sk)->ns.count))) { + sock_gen_put(sk); goto restart; } rcu_read_unlock(); local_bh_disable(); - inet_twsk_deschedule_put(tw); + if (state == TCP_TIME_WAIT) { + inet_twsk_deschedule_put(inet_twsk(sk)); + } else { + struct request_sock *req = inet_reqsk(sk); + + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, + req); + } local_bh_enable(); goto restart_rcu; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5d17e5f5d0..cd967493bf 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, tpi->flags | TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); } else { + if (unlikely(!pskb_may_pull(skb, + gre_hdr_len + sizeof(*ershdr)))) + return PACKET_REJECT; + ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); ver = ershdr->ver; + iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags | TUNNEL_KEY, iph->saddr, iph->daddr, tpi->key); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 71bf3aeed7..a9d5a19732 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -955,6 +955,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct arpt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -963,6 +965,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1253,6 +1257,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct arpt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1261,6 +1267,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a748a1e754..aee7cd584c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1109,6 +1109,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1117,6 +1119,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1493,6 +1497,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1501,6 +1507,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c index 61cb2341f5..7c1a0cd9f4 100644 --- a/net/ipv4/netfilter/nf_tproxy_ipv4.c +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -58,6 +58,8 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) laddr = 0; indev = __in_dev_get_rcu(skb->dev); + if (!indev) + return daddr; in_dev_for_each_ifa_rcu(ifa, indev) { if (ifa->ifa_flags & IFA_F_SECONDARY) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 12c59d7009..e7130a9f0e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -139,7 +139,8 @@ struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst); -static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); +static void ipv4_negative_advice(struct sock *sk, + struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, @@ -844,22 +845,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf __ip_do_redirect(rt, skb, &fl4, true); } -static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) +static void ipv4_negative_advice(struct sock *sk, + struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; - struct dst_entry *ret = dst; - if (rt) { - if (dst->obsolete > 0) { - ip_rt_put(rt); - ret = NULL; - } else if ((rt->rt_flags & RTCF_REDIRECTED) || - rt->dst.expires) { - ip_rt_put(rt); - ret = NULL; - } - } - return ret; + if ((dst->obsolete > 0) || + (rt->rt_flags & RTCF_REDIRECTED) || + rt->dst.expires) + sk_dst_reset(sk); } /* @@ -933,13 +927,11 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->n_redirects; -#ifdef CONFIG_IP_ROUTE_VERBOSE - if (log_martians && + if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians && peer->n_redirects == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &gw); -#endif } out_put_peer: inet_putpeer(peer); @@ -2175,6 +2167,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, int err = -EINVAL; u32 tag = 0; + if (!in_dev) + return -EINVAL; + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) goto martian_source; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 521c15962c..9c7998377d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2692,7 +2692,7 @@ void tcp_shutdown(struct sock *sk, int how) /* If we've already sent a FIN, or it's a closed state, skip this. */ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | - TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { + TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) tcp_send_fin(sk); @@ -2803,7 +2803,7 @@ void __tcp_close(struct sock *sk, long timeout) * machine. State transitions: * * TCP_ESTABLISHED -> TCP_FIN_WAIT1 - * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) + * TCP_SYN_RECV -> TCP_FIN_WAIT1 (it is difficult) * TCP_CLOSE_WAIT -> TCP_LAST_ACK * * are legal only when FIN has been sent (i.e. in window), @@ -2916,6 +2916,8 @@ void tcp_close(struct sock *sk, long timeout) lock_sock(sk); __tcp_close(sk, timeout); release_sock(sk); + if (!sk->sk_net_refcnt) + inet_csk_clear_xmit_timers_sync(sk); sock_put(sk); } EXPORT_SYMBOL(tcp_close); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index e3a9477293..5fdef5ddfb 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -174,6 +174,24 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, return ret; } +static bool is_next_msg_fin(struct sk_psock *psock) +{ + struct scatterlist *sge; + struct sk_msg *msg_rx; + int i; + + msg_rx = sk_psock_peek_msg(psock); + i = msg_rx->sg.start; + sge = sk_msg_elem(msg_rx, i); + if (!sge->length) { + struct sk_buff *skb = msg_rx->skb; + + if (skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + return true; + } + return false; +} + static int tcp_bpf_recvmsg_parser(struct sock *sk, struct msghdr *msg, size_t len, @@ -195,8 +213,41 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); lock_sock(sk); + + /* We may have received data on the sk_receive_queue pre-accept and + * then we can not use read_skb in this context because we haven't + * assigned a sk_socket yet so have no link to the ops. The work-around + * is to check the sk_receive_queue and in these cases read skbs off + * queue again. The read_skb hook is not running at this point because + * of lock_sock so we avoid having multiple runners in read_skb. + */ + if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) { + tcp_data_ready(sk); + /* This handles the ENOMEM errors if we both receive data + * pre accept and are already under memory pressure. At least + * let user know to retry. + */ + if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) { + copied = -EAGAIN; + goto out; + } + } + msg_bytes_ready: copied = sk_msg_recvmsg(sk, psock, msg, len, flags); + /* The typical case for EFAULT is the socket was gracefully + * shutdown with a FIN pkt. So check here the other case is + * some error on copy_page_to_iter which would be unexpected. + * On fin return correct return code to zero. + */ + if (copied == -EFAULT) { + bool is_fin = is_next_msg_fin(psock); + + if (is_fin) { + copied = 0; + goto out; + } + } if (!copied) { long timeo; int data; diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 43bcefbaef..d6db7c2564 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -55,7 +55,18 @@ struct dctcp { }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ -module_param(dctcp_shift_g, uint, 0644); + +static int dctcp_shift_g_set(const char *val, const struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, 0, 10); +} + +static const struct kernel_param_ops dctcp_shift_g_ops = { + .set = dctcp_shift_g_set, + .get = param_get_uint, +}; + +module_param_cb(dctcp_shift_g, &dctcp_shift_g_ops, &dctcp_shift_g, 0644); MODULE_PARM_DESC(dctcp_shift_g, "parameter g for updating dctcp_alpha"); static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e51b5d887c..52a9d7f96d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6543,6 +6543,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_initialize_rcv_mss(sk); tcp_fast_path_on(tp); + if (sk->sk_shutdown & SEND_SHUTDOWN) + tcp_shutdown(sk, SEND_SHUTDOWN); break; case TCP_FIN_WAIT1: { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0666be6b9e..e9b1dcf2d4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -153,6 +153,12 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) if (tcptw->tw_ts_recent_stamp && (!twp || (reuse && time_after32(ktime_get_seconds(), tcptw->tw_ts_recent_stamp)))) { + /* inet_twsk_hashdance() sets sk_refcnt after putting twsk + * and releasing the bucket lock. + */ + if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt))) + return 0; + /* In case of repair and re-using TIME-WAIT sockets we still * want to be sure that it is safe as above but honor the * sequence numbers and time stamps set as part of the repair @@ -173,7 +179,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) tp->rx_opt.ts_recent = tcptw->tw_ts_recent; tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; } - sock_hold(sktw); + return 1; } @@ -1811,7 +1817,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { - u32 limit, tail_gso_size, tail_gso_segs; + u32 tail_gso_size, tail_gso_segs; struct skb_shared_info *shinfo; const struct tcphdr *th; struct tcphdr *thtail; @@ -1820,6 +1826,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) bool fragstolen; u32 gso_segs; u32 gso_size; + u64 limit; int delta; /* In case all data was pulled from skb frags (in __pskb_pull_tail()), @@ -1916,7 +1923,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) __skb_push(skb, hdrlen); no_coalesce: - limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1); + /* sk->sk_backlog.len is reset only at the end of __release_sock(). + * Both sk->sk_backlog.len and sk->sk_rmem_alloc could reach + * sk_rcvbuf in normal conditions. + */ + limit = ((u64)READ_ONCE(sk->sk_rcvbuf)) << 1; + + limit += ((u32)READ_ONCE(sk->sk_sndbuf)) >> 1; /* Only socket owner can try to collapse/prune rx queues * to reduce memory overhead, so add a little headroom here. @@ -1924,6 +1937,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) */ limit += 64 * 1024; + limit = min_t(u64, limit, UINT_MAX); + if (unlikely(sk_add_backlog(sk, skb, limit))) { bh_unlock_sock(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d8817d6c7b..0fb84e57a2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3441,7 +3441,9 @@ void tcp_send_fin(struct sock *sk) return; } } else { - skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + skb = alloc_skb_fclone(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | + __GFP_NOWARN)); if (unlikely(!skb)) return; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2caf54c241..53d7a81d62 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -398,9 +398,9 @@ static int compute_score(struct sock *sk, struct net *net, return score; } -static u32 udp_ehashfn(const struct net *net, const __be32 laddr, - const __u16 lport, const __be32 faddr, - const __be16 fport) +INDIRECT_CALLABLE_SCOPE +u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, + const __be32 faddr, const __be16 fport) { static u32 udp_ehash_secret __read_mostly; @@ -410,22 +410,6 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, udp_ehash_secret + net_hash_mix(net)); } -static struct sock *lookup_reuseport(struct net *net, struct sock *sk, - struct sk_buff *skb, - __be32 saddr, __be16 sport, - __be32 daddr, unsigned short hnum) -{ - struct sock *reuse_sk = NULL; - u32 hash; - - if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { - hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - reuse_sk = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - } - return reuse_sk; -} - /* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, @@ -436,15 +420,28 @@ static struct sock *udp4_lib_lookup2(struct net *net, { struct sock *sk, *result; int score, badness; + bool need_rescore; result = NULL; badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, sdif); + need_rescore = false; +rescore: + score = compute_score(need_rescore ? result : sk, net, saddr, + sport, daddr, hnum, dif, sdif); if (score > badness) { badness = score; - result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); + + if (need_rescore) + continue; + + if (sk->sk_state == TCP_ESTABLISHED) { + result = sk; + continue; + } + + result = inet_lookup_reuseport(net, sk, skb, sizeof(struct udphdr), + saddr, sport, daddr, hnum, udp_ehashfn); if (!result) { result = sk; continue; @@ -458,9 +455,14 @@ static struct sock *udp4_lib_lookup2(struct net *net, if (IS_ERR(result)) continue; - badness = compute_score(result, net, saddr, sport, - daddr, hnum, dif, sdif); - + /* compute_score is too long of a function to be + * inlined, and calling it again here yields + * measureable overhead for some + * workloads. Work around it by jumping + * backwards to rescore 'result'. + */ + need_rescore = true; + goto rescore; } } return result; @@ -483,7 +485,8 @@ static struct sock *udp4_lookup_run_bpf(struct net *net, if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; - reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); + reuse_sk = inet_lookup_reuseport(net, sk, skb, sizeof(struct udphdr), + saddr, sport, daddr, hnum, udp_ehashfn); if (reuse_sk) sk = reuse_sk; return sk; @@ -601,6 +604,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); +EXPORT_SYMBOL(udp_encap_needed_key); + +#if IS_ENABLED(CONFIG_IPV6) +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +EXPORT_SYMBOL(udpv6_encap_needed_key); +#endif + void udp_encap_enable(void) { static_branch_inc(&udp_encap_needed_key); @@ -1132,16 +1142,17 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = udp_cmsg_send(sk, msg, &ipc.gso_size); - if (err > 0) + if (err > 0) { err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); + connected = 0; + } if (unlikely(err < 0)) { kfree(ipc.opt); return err; } if (ipc.opt) free = 1; - connected = 0; } if (!ipc.opt) { struct ip_options_rcu *inet_opt; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 86d32a1e62..c612688499 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -434,6 +434,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *p; unsigned int ulen; int ret = 0; + int flush; /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { @@ -467,13 +468,22 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return p; } + flush = NAPI_GRO_CB(p)->flush; + + if (NAPI_GRO_CB(p)->flush_id != 1 || + NAPI_GRO_CB(p)->count != 1 || + !NAPI_GRO_CB(p)->is_atomic) + flush |= NAPI_GRO_CB(p)->flush_id; + else + NAPI_GRO_CB(p)->is_atomic = false; + /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot * leading to excessive truesize values. * On len mismatch merge the first packet shorter than gso_size, * otherwise complete the GRO packet. */ - if (ulen > ntohs(uh2->len)) { + if (ulen > ntohs(uh2->len) || flush) { pp = p; } else { if (NAPI_GRO_CB(skb)->is_flist) { @@ -515,11 +525,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - /* we can do L4 aggregation only if the packet can't land in a tunnel - * otherwise we could corrupt the inner stream + /* We can do L4 aggregation only if the packet can't land in a tunnel + * otherwise we could corrupt the inner stream. Detecting such packets + * cannot be foolproof and the aggregation might still happen in some + * cases. Such packets should be caught in udp_unexpected_gso later. */ NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { + /* If the packet was locally encapsulated in a UDP tunnel that + * wasn't detected above, do not GRO. + */ + if (skb->encapsulation) + goto out; + if (skb->dev->features & NETIF_F_GRO_FRAGLIST) NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1; @@ -683,13 +701,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index eac206a290..1f50517289 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -61,7 +61,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) ip_send_check(iph); if (xo && (xo->flags & XFRM_GRO)) { - skb_mac_header_rebuild(skb); + /* The full l2 header needs to be preserved so that re-injecting the packet at l2 + * works correctly in the presence of vlan tags. + */ + skb_mac_header_rebuild_full(skb, xo->orig_mac_len); + skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 968ca07819..a17e1d744b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2054,9 +2054,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { - result = ifp; - in6_ifa_hold(ifp); - break; + if (in6_ifa_hold_safe(ifp)) { + result = ifp; + break; + } } } } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8e9e80eb0f..a4caaead74 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -232,8 +232,12 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp, rt = pol_lookup_func(lookup, net, table, flp6, arg->lookup_data, flags); if (rt != net->ipv6.ip6_null_entry) { + struct inet6_dev *idev = ip6_dst_idev(&rt->dst); + + if (!idev) + goto again; err = fib6_rule_saddr(net, rule, flags, flp6, - ip6_dst_idev(&rt->dst)->dev); + idev->dev); if (err == -EAGAIN) goto again; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index c40cbdfc62..869173f176 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -41,6 +41,7 @@ u32 inet6_ehashfn(const struct net *net, return __inet6_ehashfn(lhash, lport, fhash, fport, inet6_ehash_secret + net_hash_mix(net)); } +EXPORT_SYMBOL_GPL(inet6_ehashfn); /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so @@ -113,22 +114,27 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, - struct sk_buff *skb, int doff, - const struct in6_addr *saddr, - __be16 sport, - const struct in6_addr *daddr, - unsigned short hnum) +INDIRECT_CALLABLE_DECLARE(inet6_ehashfn_t udp6_ehashfn); + +struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum, + inet6_ehashfn_t *ehashfn) { struct sock *reuse_sk = NULL; u32 phash; if (sk->sk_reuseport) { - phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); + phash = INDIRECT_CALL_INET(ehashfn, udp6_ehashfn, inet6_ehashfn, + net, daddr, hnum, saddr, sport); reuse_sk = reuseport_select_sock(sk, phash, skb, doff); } return reuse_sk; } +EXPORT_SYMBOL_GPL(inet6_lookup_reuseport); /* called with rcu_read_lock() */ static struct sock *inet6_lhash2_lookup(struct net *net, @@ -145,8 +151,8 @@ static struct sock *inet6_lhash2_lookup(struct net *net, sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif); if (score > hiscore) { - result = lookup_reuseport(net, sk, skb, doff, - saddr, sport, daddr, hnum); + result = inet6_lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, hnum, inet6_ehashfn); if (result) return result; @@ -177,7 +183,8 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net, if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; - reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); + reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, hnum, inet6_ehashfn); if (reuse_sk) sk = reuse_sk; return sk; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7e0a30380b..c0ff5ee490 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -645,19 +645,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (!w) { /* New dump: * - * 1. hook callback destructor. - */ - cb->args[3] = (long)cb->done; - cb->done = fib6_dump_done; - - /* - * 2. allocate and initialize walker. + * 1. allocate and initialize walker. */ w = kzalloc(sizeof(*w), GFP_ATOMIC); if (!w) return -ENOMEM; w->func = fib6_dump_node; cb->args[2] = (long)w; + + /* 2. hook callback destructor. + */ + cb->args[3] = (long)cb->done; + cb->done = fib6_dump_done; + } arg.skb = skb; @@ -1375,7 +1375,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack) { struct fib6_table *table = rt->fib6_table; - struct fib6_node *fn, *pn = NULL; + struct fib6_node *fn; +#ifdef CONFIG_IPV6_SUBTREES + struct fib6_node *pn = NULL; +#endif int err = -ENOMEM; int allow_create = 1; int replace_required = 0; @@ -1399,9 +1402,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, goto out; } +#ifdef CONFIG_IPV6_SUBTREES pn = fn; -#ifdef CONFIG_IPV6_SUBTREES if (rt->fib6_src.plen) { struct fib6_node *sn; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 0efd5b4346..501630e3f1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -533,6 +533,9 @@ static int ip6erspan_rcv(struct sk_buff *skb, struct ip6_tnl *tunnel; u8 ver; + if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) + return PACKET_REJECT; + ipv6h = ipv6_hdr(skb); ershdr = (struct erspan_base_hdr *)skb->data; ver = ershdr->ver; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 277a5ee887..afd22ea9f5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1127,6 +1127,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1135,6 +1137,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1503,6 +1507,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1511,6 +1517,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 28e44782c9..6993675171 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -363,7 +363,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) * the source of the fragment, with the Pointer field set to zero. */ nexthdr = hdr->nexthdr; - if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) { + if (ipv6frag_thdr_truncated(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr), &nexthdr)) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3a95466e10..3bc3a30363 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -87,7 +87,8 @@ struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst); -static struct dst_entry *ip6_negative_advice(struct dst_entry *); +static void ip6_negative_advice(struct sock *sk, + struct dst_entry *dst); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); @@ -2763,24 +2764,24 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, } EXPORT_INDIRECT_CALLABLE(ip6_dst_check); -static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) +static void ip6_negative_advice(struct sock *sk, + struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *) dst; - if (rt) { - if (rt->rt6i_flags & RTF_CACHE) { - rcu_read_lock(); - if (rt6_check_expired(rt)) { - rt6_remove_exception_rt(rt); - dst = NULL; - } - rcu_read_unlock(); - } else { - dst_release(dst); - dst = NULL; + if (rt->rt6i_flags & RTF_CACHE) { + rcu_read_lock(); + if (rt6_check_expired(rt)) { + /* counteract the dst_release() in sk_dst_reset() */ + dst_hold(dst); + sk_dst_reset(sk); + + rt6_remove_exception_rt(rt); } + rcu_read_unlock(); + return; } - return dst; + sk_dst_reset(sk); } static void ip6_link_failure(struct sk_buff *skb) @@ -4456,7 +4457,7 @@ static void rtmsg_to_fib6_config(struct net *net, .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? : RT6_TABLE_MAIN, .fc_ifindex = rtmsg->rtmsg_ifindex, - .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER, + .fc_metric = rtmsg->rtmsg_metric, .fc_expires = rtmsg->rtmsg_info, .fc_dst_len = rtmsg->rtmsg_dst_len, .fc_src_len = rtmsg->rtmsg_src_len, @@ -4486,6 +4487,9 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) rtnl_lock(); switch (cmd) { case SIOCADDRT: + /* Only do the default setting of fc_metric in route adding */ + if (cfg.fc_metric == 0) + cfg.fc_metric = IP6_RT_PRIO_USER; err = ip6_route_add(&cfg, GFP_KERNEL, NULL); break; case SIOCDELRT: diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index dc434e4ee6..03090d1419 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -546,6 +546,8 @@ int __init seg6_init(void) #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL out_unregister_genl: +#endif +#if IS_ENABLED(CONFIG_IPV6_SEG6_LWTUNNEL) || IS_ENABLED(CONFIG_IPV6_SEG6_HMAC) genl_unregister_family(&seg6_genl_family); #endif out_unregister_pernet: @@ -559,8 +561,9 @@ void seg6_exit(void) seg6_hmac_exit(); #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL + seg6_local_exit(); seg6_iptunnel_exit(); #endif - unregister_pernet_subsys(&ip6_segments_ops); genl_unregister_family(&seg6_genl_family); + unregister_pernet_subsys(&ip6_segments_ops); } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index b7d6b64cc5..fdbc06f356 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -354,6 +354,7 @@ static int seg6_hmac_init_algo(void) struct crypto_shash *tfm; struct shash_desc *shash; int i, alg_count, cpu; + int ret = -ENOMEM; alg_count = ARRAY_SIZE(hmac_algos); @@ -364,12 +365,14 @@ static int seg6_hmac_init_algo(void) algo = &hmac_algos[i]; algo->tfms = alloc_percpu(struct crypto_shash *); if (!algo->tfms) - return -ENOMEM; + goto error_out; for_each_possible_cpu(cpu) { tfm = crypto_alloc_shash(algo->name, 0, 0); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); + if (IS_ERR(tfm)) { + ret = PTR_ERR(tfm); + goto error_out; + } p_tfm = per_cpu_ptr(algo->tfms, cpu); *p_tfm = tfm; } @@ -381,18 +384,22 @@ static int seg6_hmac_init_algo(void) algo->shashs = alloc_percpu(struct shash_desc *); if (!algo->shashs) - return -ENOMEM; + goto error_out; for_each_possible_cpu(cpu) { shash = kzalloc_node(shsize, GFP_KERNEL, cpu_to_node(cpu)); if (!shash) - return -ENOMEM; + goto error_out; *per_cpu_ptr(algo->shashs, cpu) = shash; } } return 0; + +error_out: + seg6_hmac_exit(); + return ret; } int __init seg6_hmac_init(void) @@ -412,22 +419,29 @@ int __net_init seg6_hmac_net_init(struct net *net) void seg6_hmac_exit(void) { struct seg6_hmac_algo *algo = NULL; + struct crypto_shash *tfm; + struct shash_desc *shash; int i, alg_count, cpu; alg_count = ARRAY_SIZE(hmac_algos); for (i = 0; i < alg_count; i++) { algo = &hmac_algos[i]; - for_each_possible_cpu(cpu) { - struct crypto_shash *tfm; - struct shash_desc *shash; - shash = *per_cpu_ptr(algo->shashs, cpu); - kfree(shash); - tfm = *per_cpu_ptr(algo->tfms, cpu); - crypto_free_shash(tfm); + if (algo->shashs) { + for_each_possible_cpu(cpu) { + shash = *per_cpu_ptr(algo->shashs, cpu); + kfree(shash); + } + free_percpu(algo->shashs); + } + + if (algo->tfms) { + for_each_possible_cpu(cpu) { + tfm = *per_cpu_ptr(algo->tfms, cpu); + crypto_free_shash(tfm); + } + free_percpu(algo->tfms); } - free_percpu(algo->tfms); - free_percpu(algo->shashs); } } EXPORT_SYMBOL(seg6_hmac_exit); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index e756ba705f..f98bb71919 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -327,10 +327,8 @@ static int seg6_input_core(struct net *net, struct sock *sk, int err; err = seg6_do_srh(skb); - if (unlikely(err)) { - kfree_skb(skb); - return err; - } + if (unlikely(err)) + goto drop; slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); @@ -355,7 +353,7 @@ static int seg6_input_core(struct net *net, struct sock *sk, err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) - return err; + goto drop; if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, @@ -363,6 +361,9 @@ static int seg6_input_core(struct net *net, struct sock *sk, skb_dst(skb)->dev, seg6_input_finish); return seg6_input_finish(dev_net(skb->dev), NULL, skb); +drop: + kfree_skb(skb); + return err; } static int seg6_input_nf(struct sk_buff *skb) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d5d254ca2d..c60162ea0a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -68,11 +68,12 @@ int udpv6_init_sock(struct sock *sk) return 0; } -static u32 udp6_ehashfn(const struct net *net, - const struct in6_addr *laddr, - const u16 lport, - const struct in6_addr *faddr, - const __be16 fport) +INDIRECT_CALLABLE_SCOPE +u32 udp6_ehashfn(const struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) { static u32 udp6_ehash_secret __read_mostly; static u32 udp_ipv6_hash_secret __read_mostly; @@ -156,24 +157,6 @@ static int compute_score(struct sock *sk, struct net *net, return score; } -static struct sock *lookup_reuseport(struct net *net, struct sock *sk, - struct sk_buff *skb, - const struct in6_addr *saddr, - __be16 sport, - const struct in6_addr *daddr, - unsigned int hnum) -{ - struct sock *reuse_sk = NULL; - u32 hash; - - if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { - hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - reuse_sk = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - } - return reuse_sk; -} - /* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -183,15 +166,28 @@ static struct sock *udp6_lib_lookup2(struct net *net, { struct sock *sk, *result; int score, badness; + bool need_rescore; result = NULL; badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, sdif); + need_rescore = false; +rescore: + score = compute_score(need_rescore ? result : sk, net, saddr, + sport, daddr, hnum, dif, sdif); if (score > badness) { badness = score; - result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); + + if (need_rescore) + continue; + + if (sk->sk_state == TCP_ESTABLISHED) { + result = sk; + continue; + } + + result = inet6_lookup_reuseport(net, sk, skb, sizeof(struct udphdr), + saddr, sport, daddr, hnum, udp6_ehashfn); if (!result) { result = sk; continue; @@ -205,8 +201,14 @@ static struct sock *udp6_lib_lookup2(struct net *net, if (IS_ERR(result)) continue; - badness = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, sdif); + /* compute_score is too long of a function to be + * inlined, and calling it again here yields + * measureable overhead for some + * workloads. Work around it by jumping + * backwards to rescore 'result'. + */ + need_rescore = true; + goto rescore; } } return result; @@ -231,7 +233,8 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net, if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; - reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); + reuse_sk = inet6_lookup_reuseport(net, sk, skb, sizeof(struct udphdr), + saddr, sport, daddr, hnum, udp6_ehashfn); if (reuse_sk) sk = reuse_sk; return sk; @@ -473,7 +476,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto try_again; } -DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void) { static_branch_inc(&udpv6_encap_needed_key); @@ -1479,9 +1482,11 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.opt = opt; err = udp_cmsg_send(sk, msg, &ipc6.gso_size); - if (err > 0) + if (err > 0) { err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6); + connected = false; + } if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1493,7 +1498,6 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; - connected = false; } if (!opt) { opt = txopt_get(np); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b3d9ed96e5..28f63c01a5 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -170,13 +170,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4907ab241d..7dbefbb338 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -56,7 +56,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) skb_postpush_rcsum(skb, skb_network_header(skb), nhlen); if (xo && (xo->flags & XFRM_GRO)) { - skb_mac_header_rebuild(skb); + /* The full l2 header needs to be preserved so that re-injecting the packet at l2 + * works correctly in the presence of vlan tags. + */ + skb_mac_header_rebuild_full(skb, xo->orig_mac_len); + skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 6cd97c7544..9a36e17498 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -136,6 +136,9 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, /* checksums verified by L2TP */ skb->ip_summed = CHECKSUM_NONE; + /* drop outer flow-hash */ + skb_clear_hash(skb); + skb_dst_drop(skb); nf_reset_ct(skb); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 02bd90a537..f277ce839d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1845,15 +1845,14 @@ static int ieee80211_change_station(struct wiphy *wiphy, } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - sta->sdata->u.vlan.sta) { - ieee80211_clear_fast_rx(sta); + sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); - } if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; + ieee80211_check_fast_rx(sta); ieee80211_check_fast_xmit(sta); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 21549a440b..03f8c8bdab 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -113,7 +113,7 @@ struct ieee80211_bss { }; /** - * enum ieee80211_corrupt_data_flags - BSS data corruption flags + * enum ieee80211_bss_corrupt_data_flags - BSS data corruption flags * @IEEE80211_BSS_CORRUPT_BEACON: last beacon frame received was corrupted * @IEEE80211_BSS_CORRUPT_PROBE_RESP: last probe response received was corrupted * @@ -126,7 +126,7 @@ enum ieee80211_bss_corrupt_data_flags { }; /** - * enum ieee80211_valid_data_flags - BSS valid data flags + * enum ieee80211_bss_valid_data_flags - BSS valid data flags * @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE * @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE * @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 55550ead2c..a4cc9d077c 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -265,19 +265,27 @@ int mac802154_llsec_key_add(struct mac802154_llsec *sec, return -ENOMEM; } +static void mac802154_llsec_key_del_rcu(struct rcu_head *rcu) +{ + struct ieee802154_llsec_key_entry *pos; + struct mac802154_llsec_key *mkey; + + pos = container_of(rcu, struct ieee802154_llsec_key_entry, rcu); + mkey = container_of(pos->key, struct mac802154_llsec_key, key); + + llsec_key_put(mkey); + kfree_sensitive(pos); +} + int mac802154_llsec_key_del(struct mac802154_llsec *sec, const struct ieee802154_llsec_key_id *key) { struct ieee802154_llsec_key_entry *pos; list_for_each_entry(pos, &sec->table.keys, list) { - struct mac802154_llsec_key *mkey; - - mkey = container_of(pos->key, struct mac802154_llsec_key, key); - if (llsec_key_id_equal(&pos->id, key)) { list_del_rcu(&pos->list); - llsec_key_put(mkey); + call_rcu(&pos->rcu, mac802154_llsec_key_del_rcu); return 0; } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 5db1c0142a..3c3f630f49 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3018,9 +3018,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, sock_hold(new_mptcp_sock); newsk = new_mptcp_sock; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); - } else { - MPTCP_INC_STATS(sock_net(sk), - MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); } out: @@ -3398,6 +3395,9 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT); mptcp_subflow_early_fallback(msk, subflow); } + + WRITE_ONCE(msk->write_seq, subflow->idsn); + WRITE_ONCE(msk->snd_nxt, subflow->idsn); if (likely(!__mptcp_check_fallback(msk))) MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 78aa6125ea..b4ccae4f68 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -250,6 +250,9 @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; spinlock_t join_list_lock; + int keepalive_cnt; + int keepalive_idle; + int keepalive_intvl; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 4bb305342f..36d85af12e 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -177,8 +177,6 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, switch (optname) { case SO_KEEPALIVE: - mptcp_sol_socket_sync_intval(msk, optname, val); - return 0; case SO_DEBUG: case SO_MARK: case SO_PRIORITY: @@ -595,6 +593,60 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t return ret; } +static int __tcp_sock_set_keepintvl(struct sock *sk, int val) +{ + if (val < 1 || val > MAX_TCP_KEEPINTVL) + return -EINVAL; + + WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ); + + return 0; +} + +static int __tcp_sock_set_keepcnt(struct sock *sk, int val) +{ + if (val < 1 || val > MAX_TCP_KEEPCNT) + return -EINVAL; + + /* Paired with READ_ONCE() in keepalive_probes() */ + WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val); + + return 0; +} + +static int mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max, + int (*set_val)(struct sock *, int), + int *msk_val, sockptr_t optval, + unsigned int optlen) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + int val, err; + + err = mptcp_get_int_option(msk, optval, optlen, &val); + if (err) + return err; + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int ret; + + lock_sock(ssk); + ret = set_val(ssk, val); + err = err ? : ret; + release_sock(ssk); + } + + if (!err) { + *msk_val = val; + sockopt_seq_inc(msk); + } + release_sock(sk); + + return err; +} + static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { @@ -603,6 +655,21 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return -EOPNOTSUPP; case TCP_CONGESTION: return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); + case TCP_KEEPIDLE: + return mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE, + &tcp_sock_set_keepidle_locked, + &msk->keepalive_idle, + optval, optlen); + case TCP_KEEPINTVL: + return mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL, + &__tcp_sock_set_keepintvl, + &msk->keepalive_intvl, + optval, optlen); + case TCP_KEEPCNT: + return mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT, + &__tcp_sock_set_keepcnt, + &msk->keepalive_cnt, + optval, optlen); } return -EOPNOTSUPP; @@ -669,9 +736,40 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int return ret; } +static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, + int __user *optlen, int val) +{ + int len; + + if (get_user(len, optlen)) + return -EFAULT; + if (len < 0) + return -EINVAL; + + if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { + unsigned char ucval = (unsigned char)val; + + len = 1; + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &ucval, 1)) + return -EFAULT; + } else { + len = min_t(unsigned int, len, sizeof(int)); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + } + + return 0; +} + static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, char __user *optval, int __user *optlen) { + struct sock *sk = (void *)msk; + switch (optname) { case TCP_ULP: case TCP_CONGESTION: @@ -679,6 +777,18 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_CC_INFO: return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); + case TCP_KEEPIDLE: + return mptcp_put_int_option(msk, optval, optlen, + msk->keepalive_idle ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ); + case TCP_KEEPINTVL: + return mptcp_put_int_option(msk, optval, optlen, + msk->keepalive_intvl ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ); + case TCP_KEEPCNT: + return mptcp_put_int_option(msk, optval, optlen, + msk->keepalive_cnt ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes)); } return -EOPNOTSUPP; } @@ -748,6 +858,9 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) tcp_set_congestion_control(ssk, msk->ca_name, false, true); + tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); + __tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); + __tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); } static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 8023078755..ff7239fe3d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -725,6 +725,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto dispose_child; } + if (fallback) + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); + subflow_drop_ctx(child); goto out; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4f645d51c2..f02ebe4609 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -728,6 +728,15 @@ config NF_FLOW_TABLE To compile it as a module, choose M here. +config NF_FLOW_TABLE_PROCFS + bool "Supply flow table statistics in procfs" + default y + depends on PROC_FS + depends on SYSCTL + help + This option enables for the flow table offload statistics + to be shown in procfs under net/netfilter/nf_flowtable. + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index aab20e575e..3f77f20ae3 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -124,6 +124,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ nf_flow_table_offload.o +nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index a0921adc31..1e689c7141 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -126,7 +126,8 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (sctph->source != cp->vport || payload_csum || skb->ip_summed == CHECKSUM_PARTIAL) { sctph->source = cp->vport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -174,7 +175,8 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, (skb->ip_summed == CHECKSUM_PARTIAL && !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_UNNECESSARY; } diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index e78cdd73ef..beb0e84b5f 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -606,14 +606,74 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) } EXPORT_SYMBOL_GPL(nf_flow_table_free); +static int nf_flow_table_init_net(struct net *net) +{ + net->ft.stat = alloc_percpu(struct nf_flow_table_stat); + return net->ft.stat ? 0 : -ENOMEM; +} + +static void nf_flow_table_fini_net(struct net *net) +{ + free_percpu(net->ft.stat); +} + +static int nf_flow_table_pernet_init(struct net *net) +{ + int ret; + + ret = nf_flow_table_init_net(net); + if (ret < 0) + return ret; + + ret = nf_flow_table_init_proc(net); + if (ret < 0) + goto out_proc; + + return 0; + +out_proc: + nf_flow_table_fini_net(net); + return ret; +} + +static void nf_flow_table_pernet_exit(struct list_head *net_exit_list) +{ + struct net *net; + + list_for_each_entry(net, net_exit_list, exit_list) { + nf_flow_table_fini_proc(net); + nf_flow_table_fini_net(net); + } +} + +static struct pernet_operations nf_flow_table_net_ops = { + .init = nf_flow_table_pernet_init, + .exit_batch = nf_flow_table_pernet_exit, +}; + static int __init nf_flow_table_module_init(void) { - return nf_flow_table_offload_init(); + int ret; + + ret = register_pernet_subsys(&nf_flow_table_net_ops); + if (ret < 0) + return ret; + + ret = nf_flow_table_offload_init(); + if (ret) + goto out_offload; + + return 0; + +out_offload: + unregister_pernet_subsys(&nf_flow_table_net_ops); + return ret; } static void __exit nf_flow_table_module_exit(void) { nf_flow_table_offload_exit(); + unregister_pernet_subsys(&nf_flow_table_net_ops); } module_init(nf_flow_table_module_init); diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 280fdd3296..6783ea220f 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, proto = veth->h_vlan_encapsulated_proto; break; case htons(ETH_P_PPP_SES): - proto = nf_flow_pppoe_proto(skb); + if (!nf_flow_pppoe_proto(skb, &proto)) + return NF_ACCEPT; break; default: proto = skb->protocol; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 28026467b5..f3227f9316 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -156,7 +156,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb, tuple->encap[i].proto = skb->protocol; break; case htons(ETH_P_PPP_SES): - phdr = (struct pppoe_hdr *)skb_mac_header(skb); + phdr = (struct pppoe_hdr *)skb_network_header(skb); tuple->encap[i].id = ntohs(phdr->sid); tuple->encap[i].proto = skb->protocol; break; @@ -246,10 +246,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, return NF_STOLEN; } -static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, +static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, u32 *offset) { struct vlan_ethhdr *veth; + __be16 inner_proto; switch (skb->protocol) { case htons(ETH_P_8021Q): @@ -260,7 +261,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, } break; case htons(ETH_P_PPP_SES): - if (nf_flow_pppoe_proto(skb) == proto) { + if (nf_flow_pppoe_proto(skb, &inner_proto) && + inner_proto == proto) { *offset += PPPOE_SES_HLEN; return true; } @@ -289,7 +291,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb, skb_reset_network_header(skb); break; case htons(ETH_P_PPP_SES): - skb->protocol = nf_flow_pppoe_proto(skb); + skb->protocol = __nf_flow_pppoe_proto(skb); skb_pull(skb, PPPOE_SES_HLEN); skb_reset_network_header(skb); break; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 336f282a22..6ac1ebe174 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -953,17 +953,22 @@ static void flow_offload_work_stats(struct flow_offload_work *offload) static void flow_offload_work_handler(struct work_struct *work) { struct flow_offload_work *offload; + struct net *net; offload = container_of(work, struct flow_offload_work, work); + net = read_pnet(&offload->flowtable->net); switch (offload->cmd) { case FLOW_CLS_REPLACE: flow_offload_work_add(offload); + NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_add); break; case FLOW_CLS_DESTROY: flow_offload_work_del(offload); + NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_del); break; case FLOW_CLS_STATS: flow_offload_work_stats(offload); + NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_stats); break; default: WARN_ON_ONCE(1); @@ -975,12 +980,18 @@ static void flow_offload_work_handler(struct work_struct *work) static void flow_offload_queue_work(struct flow_offload_work *offload) { - if (offload->cmd == FLOW_CLS_REPLACE) + struct net *net = read_pnet(&offload->flowtable->net); + + if (offload->cmd == FLOW_CLS_REPLACE) { + NF_FLOW_TABLE_STAT_INC(net, count_wq_add); queue_work(nf_flow_offload_add_wq, &offload->work); - else if (offload->cmd == FLOW_CLS_DESTROY) + } else if (offload->cmd == FLOW_CLS_DESTROY) { + NF_FLOW_TABLE_STAT_INC(net, count_wq_del); queue_work(nf_flow_offload_del_wq, &offload->work); - else + } else { + NF_FLOW_TABLE_STAT_INC(net, count_wq_stats); queue_work(nf_flow_offload_stats_wq, &offload->work); + } } static struct flow_offload_work * diff --git a/net/netfilter/nf_flow_table_procfs.c b/net/netfilter/nf_flow_table_procfs.c new file mode 100644 index 0000000000..159b033a43 --- /dev/null +++ b/net/netfilter/nf_flow_table_procfs.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +static void *nf_flow_table_cpu_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + int cpu; + + if (*pos == 0) + return SEQ_START_TOKEN; + + for (cpu = *pos - 1; cpu < nr_cpu_ids; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu + 1; + return per_cpu_ptr(net->ft.stat, cpu); + } + + return NULL; +} + +static void *nf_flow_table_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + int cpu; + + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu + 1; + return per_cpu_ptr(net->ft.stat, cpu); + } + (*pos)++; + return NULL; +} + +static void nf_flow_table_cpu_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int nf_flow_table_cpu_seq_show(struct seq_file *seq, void *v) +{ + const struct nf_flow_table_stat *st = v; + + if (v == SEQ_START_TOKEN) { + seq_puts(seq, "wq_add wq_del wq_stats\n"); + return 0; + } + + seq_printf(seq, "%8d %8d %8d\n", + st->count_wq_add, + st->count_wq_del, + st->count_wq_stats + ); + return 0; +} + +static const struct seq_operations nf_flow_table_cpu_seq_ops = { + .start = nf_flow_table_cpu_seq_start, + .next = nf_flow_table_cpu_seq_next, + .stop = nf_flow_table_cpu_seq_stop, + .show = nf_flow_table_cpu_seq_show, +}; + +int nf_flow_table_init_proc(struct net *net) +{ + struct proc_dir_entry *pde; + + pde = proc_create_net("nf_flowtable", 0444, net->proc_net_stat, + &nf_flow_table_cpu_seq_ops, + sizeof(struct seq_net_private)); + return pde ? 0 : -ENOMEM; +} + +void nf_flow_table_fini_proc(struct net *net) +{ + remove_proc_entry("nf_flowtable", net->proc_net_stat); +} diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 42bf83a20c..3999b89793 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1139,6 +1139,24 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) #define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ __NFT_TABLE_F_WAS_AWAKEN) +static bool nft_table_pending_update(const struct nft_ctx *ctx) +{ + struct nftables_pernet *nft_net = nft_pernet(ctx->net); + struct nft_trans *trans; + + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return true; + + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->ctx.table == ctx->table && + trans->msg_type == NFT_MSG_DELCHAIN && + nft_is_base_chain(trans->ctx.chain)) + return true; + } + + return false; +} + static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; @@ -1162,7 +1180,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return -EOPNOTSUPP; /* No dormant off/on/off/on games in single transaction */ - if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + if (nft_table_pending_update(ctx)) return -EINVAL; trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, @@ -2303,6 +2321,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_stats __percpu *stats = NULL; struct nft_chain_hook hook; + if (table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + if (flags & NFT_CHAIN_BINDING) return -EOPNOTSUPP; @@ -2800,7 +2821,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family, { const struct nft_expr_type *type, *candidate = NULL; - list_for_each_entry(type, &nf_tables_expressions, list) { + list_for_each_entry_rcu(type, &nf_tables_expressions, list) { if (!nla_strcmp(nla, type->name)) { if (!type->family && !candidate) candidate = type; @@ -2832,9 +2853,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, if (nla == NULL) return ERR_PTR(-EINVAL); + rcu_read_lock(); type = __nft_expr_type_get(family, nla); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES @@ -4641,6 +4666,12 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == + (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; + if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) == + (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } desc.dtype = 0; @@ -5062,6 +5093,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); @@ -7013,7 +7045,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; - list_for_each_entry(type, &nf_tables_objects, list) { + list_for_each_entry_rcu(type, &nf_tables_objects, list) { if (type->family != NFPROTO_UNSPEC && type->family != family) continue; @@ -7029,9 +7061,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; + rcu_read_lock(); type = __nft_obj_type_get(objtype, family); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES @@ -7683,11 +7719,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, return err; } +/* call under rcu_read_lock */ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) { const struct nf_flowtable_type *type; - list_for_each_entry(type, &nf_tables_flowtables, list) { + list_for_each_entry_rcu(type, &nf_tables_flowtables, list) { if (family == type->family) return type; } @@ -7699,9 +7736,13 @@ nft_flowtable_type_get(struct net *net, u8 family) { const struct nf_flowtable_type *type; + rcu_read_lock(); type = __nft_flowtable_type_get(family); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES @@ -9690,10 +9731,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + int err = 0; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) - return -EAGAIN; + err = -EAGAIN; list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { @@ -9864,12 +9906,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nf_tables_abort_release(trans); } - if (action == NFNL_ABORT_AUTOLOAD) - nf_tables_module_autoload(net); - else - nf_tables_module_autoload_cleanup(net); - - return 0; + return err; } static int nf_tables_abort(struct net *net, struct sk_buff *skb, @@ -9883,6 +9920,16 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, ret = __nf_tables_abort(net, action); nft_gc_seq_end(nft_net, gc_seq); + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + /* module autoload needs to happen after GC sequence update because it + * temporarily releases and grabs mutex again. + */ + if (action == NFNL_ABORT_AUTOLOAD) + nf_tables_module_autoload(net); + else + nf_tables_module_autoload_cleanup(net); + mutex_unlock(&nft_net->commit_mutex); return ret; @@ -10680,9 +10727,10 @@ static void __net_exit nf_tables_exit_net(struct net *net) gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nft_net->commit_list) || - !list_empty(&nft_net->module_list)) - __nf_tables_abort(net, NFNL_ABORT_NONE); + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + if (!list_empty(&nft_net->module_list)) + nf_tables_module_autoload_cleanup(net); __nft_release_tables(net); @@ -10774,6 +10822,7 @@ static void __exit nf_tables_module_exit(void) unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); + nf_tables_trans_destroy_flush_work(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); cancel_work_sync(&trans_destroy_work); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 8c96e01f6a..89b16d36da 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -167,7 +167,9 @@ instance_destroy_rcu(struct rcu_head *head) struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, rcu); + rcu_read_lock(); nfqnl_flush(inst, NULL, 0); + rcu_read_unlock(); kfree(inst); module_put(THIS_MODULE); } diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 35aa4ea942..8c76ca4d86 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -336,7 +336,9 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, return; if (n > 1) { - nf_unregister_net_hook(ctx->net, &found->ops); + if (!(ctx->chain->table->flags & NFT_TABLE_F_DORMANT)) + nf_unregister_net_hook(ctx->net, &found->ops); + list_del_rcu(&found->list); kfree_rcu(found, rcu); return; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 02327ffebc..55237d8a3d 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -45,36 +45,27 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) int mac_off = skb_mac_header(skb) - skb->data; u8 *vlanh, *dst_u8 = (u8 *) d; struct vlan_ethhdr veth; - u8 vlan_hlen = 0; - - if ((skb->protocol == htons(ETH_P_8021AD) || - skb->protocol == htons(ETH_P_8021Q)) && - offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN) - vlan_hlen += VLAN_HLEN; vlanh = (u8 *) &veth; - if (offset < VLAN_ETH_HLEN + vlan_hlen) { + if (offset < VLAN_ETH_HLEN) { u8 ethlen = len; - if (vlan_hlen && - skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0) - return false; - else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth)) + if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth)) return false; - if (offset + len > VLAN_ETH_HLEN + vlan_hlen) - ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen; + if (offset + len > VLAN_ETH_HLEN) + ethlen -= offset + len - VLAN_ETH_HLEN; - memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen); + memcpy(dst_u8, vlanh + offset, ethlen); len -= ethlen; if (len == 0) return true; dst_u8 += ethlen; - offset = ETH_HLEN + vlan_hlen; + offset = ETH_HLEN; } else { - offset -= VLAN_HLEN + vlan_hlen; + offset -= VLAN_HLEN; } return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; @@ -119,6 +110,17 @@ static int nft_payload_inner_offset(const struct nft_pktinfo *pkt) return pkt->inneroff; } +static bool nft_payload_need_vlan_adjust(u32 offset, u32 len) +{ + unsigned int boundary = offset + len; + + /* data past ether src/dst requested, copy needed */ + if (boundary > offsetof(struct ethhdr, h_proto)) + return true; + + return false; +} + void nft_payload_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -136,7 +138,8 @@ void nft_payload_eval(const struct nft_expr *expr, if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) == 0) goto err; - if (skb_vlan_tag_present(skb)) { + if (skb_vlan_tag_present(skb) && + nft_payload_need_vlan_adjust(priv->offset, priv->len)) { if (!nft_payload_copy_vlan(dest, skb, priv->offset, priv->len)) goto err; @@ -638,21 +641,89 @@ static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src, return 0; } +struct nft_payload_set { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + u8 sreg; + u8 csum_type; + u8 csum_offset; + u8 csum_flags; +}; + +/* This is not struct vlan_hdr. */ +struct nft_payload_vlan_hdr { + __be16 h_vlan_proto; + __be16 h_vlan_TCI; +}; + +static bool +nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u8 offset, u8 len, + int *vlan_hlen) +{ + struct nft_payload_vlan_hdr *vlanh; + __be16 vlan_proto; + u16 vlan_tci; + + if (offset >= offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto)) { + *vlan_hlen = VLAN_HLEN; + return true; + } + + switch (offset) { + case offsetof(struct vlan_ethhdr, h_vlan_proto): + if (len == 2) { + vlan_proto = nft_reg_load_be16(src); + skb->vlan_proto = vlan_proto; + } else if (len == 4) { + vlanh = (struct nft_payload_vlan_hdr *)src; + __vlan_hwaccel_put_tag(skb, vlanh->h_vlan_proto, + ntohs(vlanh->h_vlan_TCI)); + } else { + return false; + } + break; + case offsetof(struct vlan_ethhdr, h_vlan_TCI): + if (len != 2) + return false; + + vlan_tci = ntohs(nft_reg_load_be16(src)); + skb->vlan_tci = vlan_tci; + break; + default: + return false; + } + + return true; +} + static void nft_payload_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_payload_set *priv = nft_expr_priv(expr); - struct sk_buff *skb = pkt->skb; const u32 *src = ®s->data[priv->sreg]; - int offset, csum_offset; + int offset, csum_offset, vlan_hlen = 0; + struct sk_buff *skb = pkt->skb; __wsum fsum, tsum; switch (priv->base) { case NFT_PAYLOAD_LL_HEADER: if (!skb_mac_header_was_set(skb)) goto err; - offset = skb_mac_header(skb) - skb->data; + + if (skb_vlan_tag_present(skb) && + nft_payload_need_vlan_adjust(priv->offset, priv->len)) { + if (!nft_payload_set_vlan(src, skb, + priv->offset, priv->len, + &vlan_hlen)) + goto err; + + if (!vlan_hlen) + return; + } + + offset = skb_mac_header(skb) - skb->data - vlan_hlen; break; case NFT_PAYLOAD_NETWORK_HEADER: offset = skb_network_offset(skb); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 58eca26162..2299ced939 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1994,6 +1994,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, rules_fx = rules_f0; nft_pipapo_for_each_field(f, i, m) { + bool last = i == m->field_count - 1; + if (!pipapo_match_field(f, start, rules_fx, match_start, match_end)) break; @@ -2006,16 +2008,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); - } - if (i == m->field_count) { - priv->dirty = true; - pipapo_drop(m, rulemap); - return; + if (last && f->mt[rulemap[i].to].e == e) { + priv->dirty = true; + pipapo_drop(m, rulemap); + return; + } } first_rule += rules_f0; } + + WARN_ON_ONCE(1); /* elem_priv not found */ } /** diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 82df02695b..216445dd44 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -352,7 +352,7 @@ static void netlink_overrun(struct sock *sk) if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) { if (!test_and_set_bit(NETLINK_S_CONGESTED, &nlk_sk(sk)->state)) { - sk->sk_err = ENOBUFS; + WRITE_ONCE(sk->sk_err, ENOBUFS); sk_error_report(sk); } } @@ -1591,7 +1591,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) goto out; } - sk->sk_err = p->code; + WRITE_ONCE(sk->sk_err, p->code); sk_error_report(sk); out: return ret; @@ -1935,7 +1935,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags & MSG_DONTWAIT; - size_t copied; + size_t copied, max_recvmsg_len; struct sk_buff *skb, *data_skb; int err, ret; @@ -1968,9 +1968,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, #endif /* Record the max length of recvmsg() calls for future allocations */ - nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); - nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, - SKB_WITH_OVERHEAD(32768)); + max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len); + max_recvmsg_len = min_t(size_t, max_recvmsg_len, + SKB_WITH_OVERHEAD(32768)); + WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len); copied = data_skb->len; if (len < copied) { @@ -2005,7 +2006,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { ret = netlink_dump(sk); if (ret) { - sk->sk_err = -ret; + WRITE_ONCE(sk->sk_err, -ret); sk_error_report(sk); } } @@ -2219,6 +2220,7 @@ static int netlink_dump(struct sock *sk) struct netlink_ext_ack extack = {}; struct netlink_callback *cb; struct sk_buff *skb = NULL; + size_t max_recvmsg_len; struct module *module; int err = -ENOBUFS; int alloc_min_size; @@ -2241,8 +2243,9 @@ static int netlink_dump(struct sock *sk) cb = &nlk->cb; alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - if (alloc_min_size < nlk->max_recvmsg_len) { - alloc_size = nlk->max_recvmsg_len; + max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len); + if (alloc_min_size < max_recvmsg_len) { + alloc_size = max_recvmsg_len; skb = alloc_skb(alloc_size, (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | __GFP_NOWARN | __GFP_NORETRY); @@ -2439,7 +2442,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); if (!skb) { - NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; + WRITE_ONCE(NETLINK_CB(in_skb).sk->sk_err, ENOBUFS); sk_error_report(NETLINK_CB(in_skb).sk); return; } diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 983c5ad972..dc39ae20c6 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -285,22 +285,14 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, return 0; } -static inline void __nr_remove_node(struct nr_node *nr_node) +static void nr_remove_node_locked(struct nr_node *nr_node) { + lockdep_assert_held(&nr_node_list_lock); + hlist_del_init(&nr_node->node_node); nr_node_put(nr_node); } -#define nr_remove_node_locked(__node) \ - __nr_remove_node(__node) - -static void nr_remove_node(struct nr_node *nr_node) -{ - spin_lock_bh(&nr_node_list_lock); - __nr_remove_node(nr_node); - spin_unlock_bh(&nr_node_list_lock); -} - static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh) { hlist_del_init(&nr_neigh->neigh_node); @@ -339,6 +331,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n return -EINVAL; } + spin_lock_bh(&nr_node_list_lock); nr_node_lock(nr_node); for (i = 0; i < nr_node->count; i++) { if (nr_node->routes[i].neighbour == nr_neigh) { @@ -352,7 +345,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n nr_node->count--; if (nr_node->count == 0) { - nr_remove_node(nr_node); + nr_remove_node_locked(nr_node); } else { switch (i) { case 0: @@ -367,12 +360,14 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n nr_node_put(nr_node); } nr_node_unlock(nr_node); + spin_unlock_bh(&nr_node_list_lock); return 0; } } nr_neigh_put(nr_neigh); nr_node_unlock(nr_node); + spin_unlock_bh(&nr_node_list_lock); nr_node_put(nr_node); return -EINVAL; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 419a1d0ba4..905452006d 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1466,6 +1466,19 @@ int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode, ndev->ops->n_core_ops); } +static bool nci_valid_size(struct sk_buff *skb) +{ + unsigned int hdr_size = NCI_CTRL_HDR_SIZE; + BUILD_BUG_ON(NCI_CTRL_HDR_SIZE != NCI_DATA_HDR_SIZE); + + if (skb->len < hdr_size || + !nci_plen(skb->data) || + skb->len < hdr_size + nci_plen(skb->data)) { + return false; + } + return true; +} + /* ---- NCI TX Data worker thread ---- */ static void nci_tx_work(struct work_struct *work) @@ -1516,6 +1529,11 @@ static void nci_rx_work(struct work_struct *work) nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); + if (!nci_valid_size(skb)) { + kfree_skb(skb); + continue; + } + /* Process frame */ switch (nci_mt(skb->data)) { case NCI_MT_RSP_PKT: diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index 0f23e5e8e0..3e0fc71d95 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -76,13 +76,15 @@ EXPORT_SYMBOL_GPL(nsh_pop); static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, netdev_features_t features) { + unsigned int outer_hlen, mac_len, nsh_len; struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; - unsigned int nsh_len, mac_len; - __be16 proto; + __be16 outer_proto, proto; skb_reset_network_header(skb); + outer_proto = skb->protocol; + outer_hlen = skb_mac_header_len(skb); mac_len = skb->mac_len; if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) @@ -112,10 +114,10 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, } for (skb = segs; skb; skb = skb->next) { - skb->protocol = htons(ETH_P_NSH); - __skb_push(skb, nsh_len); - skb->mac_header = mac_offset; - skb->network_header = skb->mac_header + mac_len; + skb->protocol = outer_proto; + __skb_push(skb, nsh_len + outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, outer_hlen); skb->mac_len = mac_len; } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index aca6e2b599..85af0e9e0a 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -924,6 +924,12 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, pskb_trim(skb, ovs_mac_header_len(key)); } + /* Need to set the pkt_type to involve the routing layer. The + * packet movement through the OVS datapath doesn't generally + * use routing, but this is needed for tunnel cases. + */ + skb->pkt_type = PACKET_OUTGOING; + if (likely(!mru || (skb->len <= mru + vport->dev->hard_header_len))) { ovs_vport_send(vport, skb, ovs_key_mac_proto(key)); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 7106ce231a..85a338b681 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1704,8 +1704,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, if (ct_info.timeout[0]) { if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, ct_info.timeout)) - pr_info_ratelimited("Failed to associated timeout " - "policy `%s'\n", ct_info.timeout); + OVS_NLERR(log, + "Failed to associated timeout policy '%s'", + ct_info.timeout); else ct_info.nf_ct_timeout = rcu_dereference( nf_ct_timeout_find(ct_info.ct)->timeout); @@ -1912,9 +1913,9 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net) for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) { struct hlist_head *head = &info->limits[i]; struct ovs_ct_limit *ct_limit; + struct hlist_node *next; - hlist_for_each_entry_rcu(ct_limit, head, hlist_node, - lockdep_ovsl_is_held()) + hlist_for_each_entry_safe(ct_limit, next, head, hlist_node) kfree_rcu(ct_limit, rcu); } kfree(info->limits); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 1b81d71bac..209b42cf5a 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -417,7 +417,6 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, */ key->tp.src = htons(icmp->icmp6_type); key->tp.dst = htons(icmp->icmp6_code); - memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -426,6 +425,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, struct nd_msg *nd; int offset; + memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); + /* In order to process neighbor discovery options, we need the * entire packet. */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index cffa217fb3..0ab3b09f86 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2490,8 +2490,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb) ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); - if (!packet_read_pending(&po->tx_ring)) - complete(&po->skb_completion); + complete(&po->skb_completion); } sock_wfree(skb); diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 59aebe2968..dd4c7e9a63 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -193,7 +193,7 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(1) + nla_total_size(4), GFP_KERNEL); if (skb == NULL) goto errout; diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 4a13b9f7ab..3c513e7ca2 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -807,6 +807,24 @@ int qrtr_ns_init(void) if (ret < 0) goto err_wq; + /* As the qrtr ns socket owner and creator is the same module, we have + * to decrease the qrtr module reference count to guarantee that it + * remains zero after the ns socket is created, otherwise, executing + * "rmmod" command is unable to make the qrtr module deleted after the + * qrtr module is inserted successfully. + * + * However, the reference count is increased twice in + * sock_create_kern(): one is to increase the reference count of owner + * of qrtr socket's proto_ops struct; another is to increment the + * reference count of owner of qrtr proto struct. Therefore, we must + * decrement the module reference count twice to ensure that it keeps + * zero after server's listening socket is created. Of course, we + * must bump the module reference count twice as well before the socket + * is closed. + */ + module_put(qrtr_ns.sock->ops->owner); + module_put(qrtr_ns.sock->sk->sk_prot_creator->owner); + return 0; err_wq: @@ -821,6 +839,15 @@ void qrtr_ns_remove(void) { cancel_work_sync(&qrtr_ns.work); destroy_workqueue(qrtr_ns.workqueue); + + /* sock_release() expects the two references that were put during + * qrtr_ns_init(). This function is only called during module remove, + * so try_stop_module() has already set the refcnt to 0. Use + * __module_get() instead of try_module_get() to successfully take two + * references. + */ + __module_get(qrtr_ns.sock->ops->owner); + __module_get(qrtr_ns.sock->sk->sk_prot_creator->owner); sock_release(qrtr_ns.sock); } EXPORT_SYMBOL_GPL(qrtr_ns_remove); diff --git a/net/rds/rdma.c b/net/rds/rdma.c index c29c7a59f2..3df0affff6 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, } ret = PTR_ERR(trans_private); /* Trigger connection so that its ready for the next retry */ - if (ret == -ENODEV) + if (ret == -ENODEV && cp) rds_conn_connect_if_down(cp->cp_conn); goto out; } diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index ee9cc0abf9..d5b421072b 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -240,13 +240,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_skbmod *d = to_skbmod(a); unsigned char *b = skb_tail_pointer(skb); struct tcf_skbmod_params *p; - struct tc_skbmod opt = { - .index = d->tcf_index, - .refcnt = refcount_read(&d->tcf_refcnt) - ref, - .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, - }; + struct tc_skbmod opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + opt.index = d->tcf_index; + opt.refcnt = refcount_read(&d->tcf_refcnt) - ref, + opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind; spin_lock_bh(&d->tcf_lock); opt.action = d->tcf_action; p = rcu_dereference_protected(d->skbmod_p, diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 79ee0618d9..c9e4b37e65 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -796,6 +796,16 @@ static void smc_pnet_create_pnetids_list(struct net *net) u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct net_device *dev; + /* Newly created netns do not have devices. + * Do not even acquire rtnl. + */ + if (list_empty(&net->dev_base_head)) + return; + + /* Note: This might not be needed, because smc_pnet_netdev_event() + * is also calling smc_pnet_add_base_pnetid() when handling + * NETDEV_UP event. + */ rtnl_lock(); for_each_netdev(net, dev) smc_pnet_add_base_pnetid(net, dev, ndev_pnetid); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 48b608cb5f..93a7b7061d 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1132,17 +1132,11 @@ gss_read_verf(struct rpc_gss_wire_cred *gc, static void gss_free_in_token_pages(struct gssp_in_token *in_token) { - u32 inlen; int i; i = 0; - inlen = in_token->page_len; - while (inlen) { - if (in_token->pages[i]) - put_page(in_token->pages[i]); - inlen -= inlen > PAGE_SIZE ? PAGE_SIZE : inlen; - } - + while (in_token->pages[i]) + put_page(in_token->pages[i++]); kfree(in_token->pages); in_token->pages = NULL; } @@ -1168,7 +1162,7 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp, } pages = DIV_ROUND_UP(inlen, PAGE_SIZE); - in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL); + in_token->pages = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); if (!in_token->pages) { kfree(in_handle->data); return SVC_DENIED; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index af1ca707c3..f73d459362 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -398,7 +398,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, clnt->cl_maxproc = version->nrprocs; clnt->cl_prog = args->prognumber ? : program->number; clnt->cl_vers = version->number; - clnt->cl_stats = program->stats; + clnt->cl_stats = args->stats ? : program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects); err = -ENOMEM; @@ -677,6 +677,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt) .version = clnt->cl_vers, .authflavor = clnt->cl_auth->au_flavor, .cred = clnt->cl_cred, + .stats = clnt->cl_stats, }; return __rpc_clone_client(&args, clnt); } @@ -699,6 +700,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor) .version = clnt->cl_vers, .authflavor = flavor, .cred = clnt->cl_cred, + .stats = clnt->cl_stats, }; return __rpc_clone_client(&args, clnt); } @@ -979,6 +981,8 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, .version = vers, .authflavor = old->cl_auth->au_flavor, .cred = old->cl_cred, + .stats = old->cl_stats, + .timeout = old->cl_timeout, }; struct rpc_clnt *clnt; int err; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 36a3ad9336..8d5897ed28 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -37,18 +37,37 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net); -#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) - #define SVC_POOL_DEFAULT SVC_POOL_GLOBAL +/* + * Mode for mapping cpus to pools. + */ +enum { + SVC_POOL_AUTO = -1, /* choose one of the others */ + SVC_POOL_GLOBAL, /* no mapping, just a single global pool + * (legacy & UP mode) */ + SVC_POOL_PERCPU, /* one pool per cpu */ + SVC_POOL_PERNODE /* one pool per numa node */ +}; + /* * Structure for mapping cpus to pools and vice versa. * Setup once during sunrpc initialisation. */ -struct svc_pool_map svc_pool_map = { + +struct svc_pool_map { + int count; /* How many svc_servs use us */ + int mode; /* Note: int not enum to avoid + * warnings about "enumeration value + * not handled in switch" */ + unsigned int npools; + unsigned int *pool_to; /* maps pool id to cpu or node */ + unsigned int *to_pool; /* maps cpu or node to pool id */ +}; + +static struct svc_pool_map svc_pool_map = { .mode = SVC_POOL_DEFAULT }; -EXPORT_SYMBOL_GPL(svc_pool_map); static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ @@ -219,10 +238,12 @@ svc_pool_map_init_pernode(struct svc_pool_map *m) /* * Add a reference to the global map of cpus to pools (and - * vice versa). Initialise the map if we're the first user. - * Returns the number of pools. + * vice versa) if pools are in use. + * Initialise the map if we're the first user. + * Returns the number of pools. If this is '1', no reference + * was taken. */ -unsigned int +static unsigned int svc_pool_map_get(void) { struct svc_pool_map *m = &svc_pool_map; @@ -232,6 +253,7 @@ svc_pool_map_get(void) if (m->count++) { mutex_unlock(&svc_pool_map_mutex); + WARN_ON_ONCE(m->npools <= 1); return m->npools; } @@ -247,30 +269,36 @@ svc_pool_map_get(void) break; } - if (npools < 0) { + if (npools <= 0) { /* default, or memory allocation failure */ npools = 1; m->mode = SVC_POOL_GLOBAL; } m->npools = npools; + if (npools == 1) + /* service is unpooled, so doesn't hold a reference */ + m->count--; + mutex_unlock(&svc_pool_map_mutex); - return m->npools; + return npools; } -EXPORT_SYMBOL_GPL(svc_pool_map_get); /* - * Drop a reference to the global map of cpus to pools. + * Drop a reference to the global map of cpus to pools, if + * pools were in use, i.e. if npools > 1. * When the last reference is dropped, the map data is * freed; this allows the sysadmin to change the pool * mode using the pool_mode module option without * rebooting or re-loading sunrpc.ko. */ -void -svc_pool_map_put(void) +static void +svc_pool_map_put(int npools) { struct svc_pool_map *m = &svc_pool_map; + if (npools <= 1) + return; mutex_lock(&svc_pool_map_mutex); if (!--m->count) { @@ -283,7 +311,6 @@ svc_pool_map_put(void) mutex_unlock(&svc_pool_map_mutex); } -EXPORT_SYMBOL_GPL(svc_pool_map_put); static int svc_pool_map_get_node(unsigned int pidx) { @@ -340,21 +367,18 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) struct svc_pool_map *m = &svc_pool_map; unsigned int pidx = 0; - /* - * An uninitialised map happens in a pure client when - * lockd is brought up, so silently treat it the - * same as SVC_POOL_GLOBAL. - */ - if (svc_serv_is_pooled(serv)) { - switch (m->mode) { - case SVC_POOL_PERCPU: - pidx = m->to_pool[cpu]; - break; - case SVC_POOL_PERNODE: - pidx = m->to_pool[cpu_to_node(cpu)]; - break; - } + if (serv->sv_nrpools <= 1) + return serv->sv_pools; + + switch (m->mode) { + case SVC_POOL_PERCPU: + pidx = m->to_pool[cpu]; + break; + case SVC_POOL_PERNODE: + pidx = m->to_pool[cpu_to_node(cpu)]; + break; } + return &serv->sv_pools[pidx % serv->sv_nrpools]; } @@ -424,7 +448,7 @@ __svc_init_bc(struct svc_serv *serv) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - const struct svc_serv_ops *ops) + int (*threadfn)(void *data)) { struct svc_serv *serv; unsigned int vers; @@ -435,13 +459,13 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, return NULL; serv->sv_name = prog->pg_name; serv->sv_program = prog; - serv->sv_nrthreads = 1; + kref_init(&serv->sv_refcnt); serv->sv_stats = prog->pg_stats; if (bufsize > RPCSVC_MAXPAYLOAD) bufsize = RPCSVC_MAXPAYLOAD; serv->sv_max_payload = bufsize? bufsize : 4096; serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); - serv->sv_ops = ops; + serv->sv_threadfn = threadfn; xdrsize = 0; while (prog) { prog->pg_lovers = prog->pg_nvers-1; @@ -487,59 +511,56 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, return serv; } -struct svc_serv * -svc_create(struct svc_program *prog, unsigned int bufsize, - const struct svc_serv_ops *ops) +/** + * svc_create - Create an RPC service + * @prog: the RPC program the new service will handle + * @bufsize: maximum message size for @prog + * @threadfn: a function to service RPC requests for @prog + * + * Returns an instantiated struct svc_serv object or NULL. + */ +struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize, + int (*threadfn)(void *data)) { - return __svc_create(prog, bufsize, /*npools*/1, ops); + return __svc_create(prog, bufsize, 1, threadfn); } EXPORT_SYMBOL_GPL(svc_create); -struct svc_serv * -svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - const struct svc_serv_ops *ops) +/** + * svc_create_pooled - Create an RPC service with pooled threads + * @prog: the RPC program the new service will handle + * @bufsize: maximum message size for @prog + * @threadfn: a function to service RPC requests for @prog + * + * Returns an instantiated struct svc_serv object or NULL. + */ +struct svc_serv *svc_create_pooled(struct svc_program *prog, + unsigned int bufsize, + int (*threadfn)(void *data)) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, ops); + serv = __svc_create(prog, bufsize, npools, threadfn); if (!serv) goto out_err; return serv; out_err: - svc_pool_map_put(); + svc_pool_map_put(npools); return NULL; } EXPORT_SYMBOL_GPL(svc_create_pooled); -void svc_shutdown_net(struct svc_serv *serv, struct net *net) -{ - svc_close_net(serv, net); - - if (serv->sv_ops->svo_shutdown) - serv->sv_ops->svo_shutdown(serv, net); -} -EXPORT_SYMBOL_GPL(svc_shutdown_net); - /* * Destroy an RPC service. Should be called with appropriate locking to - * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. + * protect sv_permsocks and sv_tempsocks. */ void -svc_destroy(struct svc_serv *serv) +svc_destroy(struct kref *ref) { - dprintk("svc: svc_destroy(%s, %d)\n", - serv->sv_program->pg_name, - serv->sv_nrthreads); - - if (serv->sv_nrthreads) { - if (--(serv->sv_nrthreads) != 0) { - svc_sock_update_bufs(serv); - return; - } - } else - printk("svc_destroy: no threads for serv=%p!\n", serv); + struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt); + dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name); del_timer_sync(&serv->sv_temptimer); /* @@ -551,8 +572,7 @@ svc_destroy(struct svc_serv *serv) cache_clean_deferred(serv); - if (svc_serv_is_pooled(serv)) - svc_pool_map_put(); + svc_pool_map_put(serv->sv_nrpools); kfree(serv->sv_pools); kfree(serv); @@ -638,7 +658,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) } EXPORT_SYMBOL_GPL(svc_rqst_alloc); -struct svc_rqst * +static struct svc_rqst * svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { struct svc_rqst *rqstp; @@ -647,14 +667,17 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) return ERR_PTR(-ENOMEM); - serv->sv_nrthreads++; + svc_get(serv); + spin_lock_bh(&serv->sv_lock); + serv->sv_nrthreads += 1; + spin_unlock_bh(&serv->sv_lock); + spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads++; list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); spin_unlock_bh(&pool->sp_lock); return rqstp; } -EXPORT_SYMBOL_GPL(svc_prepare_thread); /* * Choose a pool in which to create a new thread, for svc_set_num_threads @@ -728,11 +751,9 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) if (IS_ERR(rqstp)) return PTR_ERR(rqstp); - __module_get(serv->sv_ops->svo_module); - task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, + task = kthread_create_on_node(serv->sv_threadfn, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { - module_put(serv->sv_ops->svo_module); svc_exit_thread(rqstp); return PTR_ERR(task); } @@ -748,59 +769,13 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return 0; } - -/* destroy old threads */ -static int -svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) -{ - struct task_struct *task; - unsigned int state = serv->sv_nrthreads-1; - - /* destroy old threads */ - do { - task = choose_victim(serv, pool, &state); - if (task == NULL) - break; - send_sig(SIGINT, task, 1); - nrservs++; - } while (nrservs < 0); - - return 0; -} - /* * Create or destroy enough new threads to make the number * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between * all pools. Caller must ensure that mutual exclusion between this and * server startup or shutdown. - * - * Destroying threads relies on the service threads filling in - * rqstp->rq_task, which only the nfs ones do. Assumes the serv - * has been created using svc_create_pooled(). - * - * Based on code that used to be in nfsd_svc() but tweaked - * to be pool-aware. */ -int -svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) -{ - if (pool == NULL) { - /* The -1 assumes caller has done a svc_get() */ - nrservs -= (serv->sv_nrthreads-1); - } else { - spin_lock_bh(&pool->sp_lock); - nrservs -= pool->sp_nrthreads; - spin_unlock_bh(&pool->sp_lock); - } - - if (nrservs > 0) - return svc_start_kthreads(serv, pool, nrservs); - if (nrservs < 0) - return svc_signal_kthreads(serv, pool, nrservs); - return 0; -} -EXPORT_SYMBOL_GPL(svc_set_num_threads); /* destroy old threads */ static int @@ -825,11 +800,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) } int -svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { if (pool == NULL) { - /* The -1 assumes caller has done a svc_get() */ - nrservs -= (serv->sv_nrthreads-1); + nrservs -= serv->sv_nrthreads; } else { spin_lock_bh(&pool->sp_lock); nrservs -= pool->sp_nrthreads; @@ -842,7 +816,7 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser return svc_stop_kthreads(serv, pool, nrservs); return 0; } -EXPORT_SYMBOL_GPL(svc_set_num_threads_sync); +EXPORT_SYMBOL_GPL(svc_set_num_threads); /** * svc_rqst_replace_page - Replace one page in rq_pages[] @@ -894,11 +868,14 @@ svc_exit_thread(struct svc_rqst *rqstp) list_del_rcu(&rqstp->rq_all); spin_unlock_bh(&pool->sp_lock); + spin_lock_bh(&serv->sv_lock); + serv->sv_nrthreads -= 1; + spin_unlock_bh(&serv->sv_lock); + svc_sock_update_bufs(serv); + svc_rqst_free(rqstp); - /* Release the server */ - if (serv) - svc_destroy(serv); + svc_put(serv); } EXPORT_SYMBOL_GPL(svc_exit_thread); @@ -1261,11 +1238,9 @@ svc_generic_init_request(struct svc_rqst *rqstp, if (rqstp->rq_proc >= versp->vs_nproc) goto err_bad_proc; rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc]; - if (!procp) - goto err_bad_proc; /* Initialize storage for argp and resp */ - memset(rqstp->rq_argp, 0, procp->pc_argsize); + memset(rqstp->rq_argp, 0, procp->pc_argzero); memset(rqstp->rq_resp, 0, procp->pc_ressize); /* Bump per-procedure stats counter */ @@ -1438,7 +1413,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) svc_authorise(rqstp); close_xprt: if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) - svc_close_xprt(rqstp->rq_xprt); + svc_xprt_close(rqstp->rq_xprt); dprintk("svc: svc_process close\n"); return 0; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5ff8f902f1..3cf53e3140 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -267,12 +267,12 @@ void svc_xprt_received(struct svc_xprt *xprt) trace_svc_xprt_received(xprt); /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_enqueue_xprt with: + * 'put', so we need a reference to call svc_xprt_enqueue with: */ svc_xprt_get(xprt); smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); - xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); + svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_received); @@ -286,7 +286,7 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) svc_xprt_received(new); } -static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, +static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred) @@ -322,21 +322,35 @@ static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, return -EPROTONOSUPPORT; } -int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, +/** + * svc_xprt_create - Add a new listener to @serv + * @serv: target RPC service + * @xprt_name: transport class name + * @net: network namespace + * @family: network address family + * @port: listener port + * @flags: SVC_SOCK flags + * @cred: credential to bind to this transport + * + * Return values: + * %0: New listener added successfully + * %-EPROTONOSUPPORT: Requested transport type not supported + */ +int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred) { int err; - err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); + err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); if (err == -EPROTONOSUPPORT) { request_module("svc%s", xprt_name); - err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); + err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); } return err; } -EXPORT_SYMBOL_GPL(svc_create_xprt); +EXPORT_SYMBOL_GPL(svc_xprt_create); /* * Copy the local and remote xprt addresses to the rqstp structure @@ -412,6 +426,8 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) smp_rmb(); xpt_flags = READ_ONCE(xprt->xpt_flags); + if (xpt_flags & BIT(XPT_BUSY)) + return false; if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) return true; if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { @@ -424,7 +440,12 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) return false; } -void svc_xprt_do_enqueue(struct svc_xprt *xprt) +/** + * svc_xprt_enqueue - Queue a transport on an idle nfsd thread + * @xprt: transport with data pending + * + */ +void svc_xprt_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; struct svc_rqst *rqstp = NULL; @@ -468,19 +489,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) put_cpu(); trace_svc_xprt_do_enqueue(xprt, rqstp); } -EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); - -/* - * Queue up a transport with data pending. If there are idle nfsd - * processes, wake 'em up. - * - */ -void svc_xprt_enqueue(struct svc_xprt *xprt) -{ - if (test_bit(XPT_BUSY, &xprt->xpt_flags)) - return; - xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); -} EXPORT_SYMBOL_GPL(svc_xprt_enqueue); /* @@ -692,12 +700,12 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) /* Made progress, don't sleep yet */ continue; - set_current_state(TASK_INTERRUPTIBLE); - if (signalled() || kthread_should_stop()) { + set_current_state(TASK_IDLE); + if (kthread_should_stop()) { set_current_state(TASK_RUNNING); return -EINTR; } - schedule_timeout(msecs_to_jiffies(500)); + freezable_schedule_timeout(msecs_to_jiffies(500)); } rqstp->rq_page_end = &rqstp->rq_pages[pages]; rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */ @@ -728,7 +736,7 @@ rqst_should_sleep(struct svc_rqst *rqstp) return false; /* are we shutting down? */ - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) return false; /* are we freezing? */ @@ -750,18 +758,14 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (rqstp->rq_xprt) goto out_found; - /* - * We have to be able to interrupt this wait - * to bring down the daemons ... - */ - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_IDLE); smp_mb__before_atomic(); clear_bit(SP_CONGESTED, &pool->sp_flags); clear_bit(RQ_BUSY, &rqstp->rq_flags); smp_mb__after_atomic(); if (likely(rqst_should_sleep(rqstp))) - time_left = schedule_timeout(timeout); + time_left = freezable_schedule_timeout(timeout); else __set_current_state(TASK_RUNNING); @@ -776,7 +780,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (!time_left) atomic_long_inc(&pool->sp_stats.threads_timedout); - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) return ERR_PTR(-EINTR); return ERR_PTR(-EAGAIN); out_found: @@ -874,7 +878,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) try_to_freeze(); cond_resched(); err = -EINTR; - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) goto out; xprt = svc_get_next_xprt(rqstp, timeout); @@ -1070,7 +1074,12 @@ static void svc_delete_xprt(struct svc_xprt *xprt) svc_xprt_put(xprt); } -void svc_close_xprt(struct svc_xprt *xprt) +/** + * svc_xprt_close - Close a client connection + * @xprt: transport to disconnect + * + */ +void svc_xprt_close(struct svc_xprt *xprt) { trace_svc_xprt_close(xprt); set_bit(XPT_CLOSE, &xprt->xpt_flags); @@ -1085,7 +1094,7 @@ void svc_close_xprt(struct svc_xprt *xprt) */ svc_delete_xprt(xprt); } -EXPORT_SYMBOL_GPL(svc_close_xprt); +EXPORT_SYMBOL_GPL(svc_xprt_close); static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { @@ -1137,7 +1146,11 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) } } -/* +/** + * svc_xprt_destroy_all - Destroy transports associated with @serv + * @serv: RPC service to be shut down + * @net: target network namespace + * * Server threads may still be running (especially in the case where the * service is still running in other network namespaces). * @@ -1149,7 +1162,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) * threads, we may need to wait a little while and then check again to * see if they're done. */ -void svc_close_net(struct svc_serv *serv, struct net *net) +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net) { int delay = 0; @@ -1160,6 +1173,7 @@ void svc_close_net(struct svc_serv *serv, struct net *net) msleep(delay++); } } +EXPORT_SYMBOL_GPL(svc_xprt_destroy_all); /* * Handle defer and revisit of requests diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index be7081284a..112236dd72 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1334,25 +1334,10 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } -bool svc_alien_sock(struct net *net, int fd) -{ - int err; - struct socket *sock = sockfd_lookup(fd, &err); - bool ret = false; - - if (!sock) - goto out; - if (sock_net(sock->sk) != net) - ret = true; - sockfd_put(sock); -out: - return ret; -} -EXPORT_SYMBOL_GPL(svc_alien_sock); - /** * svc_addsock - add a listener socket to an RPC service * @serv: pointer to RPC service to which to add a new listener + * @net: caller's network namespace * @fd: file descriptor of the new listener * @name_return: pointer to buffer to fill in with name of listener * @len: size of the buffer @@ -1362,8 +1347,8 @@ EXPORT_SYMBOL_GPL(svc_alien_sock); * Name is terminated with '\n'. On error, returns a negative errno * value. */ -int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, - const size_t len, const struct cred *cred) +int svc_addsock(struct svc_serv *serv, struct net *net, const int fd, + char *name_return, const size_t len, const struct cred *cred) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); @@ -1374,6 +1359,9 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, if (!so) return err; + err = -EINVAL; + if (sock_net(so->sk) != net) + goto out; err = -EAFNOSUPPORT; if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6)) goto out; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index f0a0a4ad6d..b227d0c847 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -918,6 +918,28 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, } EXPORT_SYMBOL_GPL(xdr_init_encode); +/** + * xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages + * @xdr: pointer to xdr_stream struct + * @buf: pointer to XDR buffer into which to encode data + * @pages: list of pages to decode into + * @rqst: pointer to controlling rpc_rqst, for debugging + * + */ +void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, struct rpc_rqst *rqst) +{ + xdr_reset_scratch_buffer(xdr); + + xdr->buf = buf; + xdr->page_ptr = pages; + xdr->iov = NULL; + xdr->p = page_address(*pages); + xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); + xdr->rqst = rqst; +} +EXPORT_SYMBOL_GPL(xdr_init_encode_pages); + /** * xdr_commit_encode - Ensure all data is written to buffer * @xdr: pointer to xdr_stream diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 16897fcb65..85c8cdda98 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -198,7 +198,7 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst) ret = rpcrdma_bc_send_request(rdma, rqst); if (ret == -ENOTCONN) - svc_close_xprt(sxprt); + svc_xprt_close(sxprt); return ret; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 41095a278f..34413d4ab0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -258,7 +258,11 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: pr_info("rpcrdma: removing device %s for %pISpc\n", ep->re_id->device->name, sap); - fallthrough; + switch (xchg(&ep->re_connect_status, -ENODEV)) { + case 0: goto wake_connect_worker; + case 1: goto disconnected; + } + return 0; case RDMA_CM_EVENT_ADDR_CHANGE: ep->re_connect_status = -ENODEV; goto disconnected; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 5c9fd4791c..76284fc538 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -142,9 +142,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - *buf = NULL; if (skb_has_frag_list(frag) && __skb_linearize(frag)) goto err; + *buf = NULL; frag = skb_unshare(frag, GFP_ATOMIC); if (unlikely(!frag)) goto err; @@ -156,6 +156,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (!head) goto err; + /* Either the input skb ownership is transferred to headskb + * or the input skb is freed, clear the reference to avoid + * bad access on error path. + */ + *buf = NULL; if (skb_try_coalesce(head, frag, &headstolen, &delta)) { kfree_skb_partial(frag, headstolen); } else { @@ -179,7 +184,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) *headbuf = NULL; return 1; } - *buf = NULL; return 0; err: kfree_skb(*buf); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 79ea1ab345..4a3bf8528d 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -636,9 +636,17 @@ struct tls_context *tls_ctx_create(struct sock *sk) return NULL; mutex_init(&ctx->tx_lock); - rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = READ_ONCE(sk->sk_prot); ctx->sk = sk; + /* Release semantic of rcu_assign_pointer() ensures that + * ctx->sk_proto is visible before changing sk->sk_prot in + * update_sk_prot(), and prevents reading uninitialized value in + * tls_{getsockopt, setsockopt}. Note that we do not need a + * read barrier in tls_{getsockopt,setsockopt} as there is an + * address dependency between sk->sk_proto->{getsockopt,setsockopt} + * and ctx->sk_proto. + */ + rcu_assign_pointer(icsk->icsk_ulp_data, ctx); return ctx; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fc55b65695..90f6cbe5cd 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -174,7 +174,17 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) struct scatterlist *sg; struct sk_buff *skb; unsigned int pages; - int pending; + + /* If requests get too backlogged crypto API returns -EBUSY and calls + * ->complete(-EINPROGRESS) immediately followed by ->complete(0) + * to make waiting for backlog to flush with crypto_wait_req() easier. + * First wait converts -EBUSY -> -EINPROGRESS, and the second one + * -EINPROGRESS -> 0. + * We have a single struct crypto_async_request per direction, this + * scheme doesn't help us, so just ignore the first ->complete(). + */ + if (err == -EINPROGRESS) + return; skb = (struct sk_buff *)req->data; tls_ctx = tls_get_ctx(skb->sk); @@ -221,12 +231,17 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) kfree(aead_req); - spin_lock_bh(&ctx->decrypt_compl_lock); - pending = atomic_dec_return(&ctx->decrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->decrypt_compl_lock); +} + +static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) +{ + if (!atomic_dec_and_test(&ctx->decrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->decrypt_pending); + + return ctx->async_wait.err; } static int tls_do_decryption(struct sock *sk, @@ -260,6 +275,7 @@ static int tls_do_decryption(struct sock *sk, aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, skb); + BUILD_BUG_ON_INVALID(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { aead_request_set_callback(aead_req, @@ -268,6 +284,10 @@ static int tls_do_decryption(struct sock *sk, } ret = crypto_aead_decrypt(aead_req); + if (ret == -EBUSY) { + ret = tls_decrypt_async_wait(ctx); + ret = ret ?: -EINPROGRESS; + } if (ret == -EINPROGRESS) { if (darg->async) return 0; @@ -449,7 +469,9 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) struct sk_msg *msg_en; struct tls_rec *rec; bool ready = false; - int pending; + + if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ + return; rec = container_of(aead_req, struct tls_rec, aead_req); msg_en = &rec->msg_encrypted; @@ -484,12 +506,8 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) ready = true; } - spin_lock_bh(&ctx->encrypt_compl_lock); - pending = atomic_dec_return(&ctx->encrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->encrypt_compl_lock); if (!ready) return; @@ -499,6 +517,15 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) schedule_delayed_work(&ctx->tx_work.work, 1); } +static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) +{ + if (!atomic_dec_and_test(&ctx->encrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->encrypt_pending); + + return ctx->async_wait.err; +} + static int tls_do_encryption(struct sock *sk, struct tls_context *tls_ctx, struct tls_sw_context_tx *ctx, @@ -538,9 +565,14 @@ static int tls_do_encryption(struct sock *sk, /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); + BUILD_BUG_ON_INVALID(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); + if (rc == -EBUSY) { + rc = tls_encrypt_async_wait(ctx); + rc = rc ?: -EINPROGRESS; + } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; @@ -949,7 +981,6 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int num_zc = 0; int orig_size; int ret = 0; - int pending; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_CMSG_COMPAT)) @@ -1118,24 +1149,12 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) if (!num_async) { goto send_end; } else if (num_zc) { - /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + int err; - if (ctx->async_wait.err) { - ret = ctx->async_wait.err; + /* Wait for pending encryptions to get completed */ + err = tls_encrypt_async_wait(ctx); + if (err) { + ret = err; copied = 0; } } @@ -1913,31 +1932,16 @@ int tls_sw_recvmsg(struct sock *sk, recv_end: if (async) { - int pending; - /* Wait for all previously submitted records to be decrypted */ - spin_lock_bh(&ctx->decrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - if (pending) { - err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - if (err) { - /* one of async decrypt failed */ - tls_err_abort(sk, err); - copied = 0; - decrypted = 0; - goto end; - } - } else { - reinit_completion(&ctx->async_wait.completion); + err = tls_decrypt_async_wait(ctx); + if (err) { + /* one of async decrypt failed */ + tls_err_abort(sk, err); + copied = 0; + decrypted = 0; + goto end; } - /* There can be no concurrent accesses, since we have no - * pending decrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) err = process_rx_list(ctx, msg, &control, copied, @@ -2154,16 +2158,9 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; - int pending; /* Wait for any pending async encryptions to complete */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); @@ -2301,6 +2298,46 @@ void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) strp_check_rcv(&rx_ctx->strp); } +static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) +{ + struct tls_sw_context_tx *sw_ctx_tx; + + if (!ctx->priv_ctx_tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) + return NULL; + } else { + sw_ctx_tx = ctx->priv_ctx_tx; + } + + crypto_init_wait(&sw_ctx_tx->async_wait); + atomic_set(&sw_ctx_tx->encrypt_pending, 1); + INIT_LIST_HEAD(&sw_ctx_tx->tx_list); + INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); + sw_ctx_tx->tx_work.sk = sk; + + return sw_ctx_tx; +} + +static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) +{ + struct tls_sw_context_rx *sw_ctx_rx; + + if (!ctx->priv_ctx_rx) { + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) + return NULL; + } else { + sw_ctx_rx = ctx->priv_ctx_rx; + } + + crypto_init_wait(&sw_ctx_rx->async_wait); + atomic_set(&sw_ctx_rx->decrypt_pending, 1); + skb_queue_head_init(&sw_ctx_rx->rx_list); + + return sw_ctx_rx; +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2327,46 +2364,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } if (tx) { - if (!ctx->priv_ctx_tx) { - sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); - if (!sw_ctx_tx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_tx = sw_ctx_tx; - } else { - sw_ctx_tx = - (struct tls_sw_context_tx *)ctx->priv_ctx_tx; - } - } else { - if (!ctx->priv_ctx_rx) { - sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); - if (!sw_ctx_rx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_rx = sw_ctx_rx; - } else { - sw_ctx_rx = - (struct tls_sw_context_rx *)ctx->priv_ctx_rx; - } - } + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; - if (tx) { - crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; - INIT_LIST_HEAD(&sw_ctx_tx->tx_list); - INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); - sw_ctx_tx->tx_work.sk = sk; } else { - crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; + + sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; - skb_queue_head_init(&sw_ctx_rx->rx_list); aead = &sw_ctx_rx->aead_recv; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 265dc665c9..80f91b5ab4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -877,11 +877,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; - u = unix_sk(sk); + u = unix_sk(sk); + u->inflight = 0; u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); - atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ @@ -2005,13 +2005,15 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other maybe_add_creds(skb, sock, other); skb_get(skb); + scm_stat_add(other, skb); + + spin_lock(&other->sk_receive_queue.lock); if (ousk->oob_skb) consume_skb(ousk->oob_skb); - WRITE_ONCE(ousk->oob_skb, skb); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); - scm_stat_add(other, skb); - skb_queue_tail(&other->sk_receive_queue, skb); sk_send_sigurg(other); unix_state_unlock(other); other->sk_data_ready(other); @@ -2057,7 +2059,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto out_err; } - if (sk->sk_shutdown & SEND_SHUTDOWN) + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto pipe_err; while (sent < len) { @@ -2516,8 +2518,10 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) mutex_lock(&u->iolock); unix_state_lock(sk); + spin_lock(&sk->sk_receive_queue.lock); if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); mutex_unlock(&u->iolock); return -EINVAL; @@ -2529,6 +2533,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) WRITE_ONCE(u->oob_skb, NULL); else skb_get(oob_skb); + + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); chunk = state->recv_actor(oob_skb, 0, chunk, state); @@ -2557,6 +2563,10 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, consume_skb(skb); skb = NULL; } else { + struct sk_buff *unlinked_skb = NULL; + + spin_lock(&sk->sk_receive_queue.lock); + if (skb == u->oob_skb) { if (copied) { skb = NULL; @@ -2565,12 +2575,22 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); } - } else if (!(flags & MSG_PEEK)) { - skb_unlink(skb, &sk->sk_receive_queue); - consume_skb(skb); + } else if (flags & MSG_PEEK) { + skb = NULL; + } else { + __skb_unlink(skb, &sk->sk_receive_queue); + WRITE_ONCE(u->oob_skb, NULL); + unlinked_skb = skb; skb = skb_peek(&sk->sk_receive_queue); } } + + spin_unlock(&sk->sk_receive_queue.lock); + + if (unlinked_skb) { + WARN_ON_ONCE(skb_unref(unlinked_skb)); + kfree_skb(unlinked_skb); + } } return skb; } @@ -2642,18 +2662,16 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, last = skb = skb_peek(&sk->sk_receive_queue); last_len = last ? last->len : 0; +again: #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (skb) { skb = manage_oob(skb, sk, flags, copied); - if (!skb) { + if (!skb && copied) { unix_state_unlock(sk); - if (copied) - break; - goto redo; + break; } } #endif -again: if (skb == NULL) { if (copied >= target) goto unlock; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 9bfffe2a7f..d2fc795394 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), static void dec_inflight(struct unix_sock *usk) { - atomic_long_dec(&usk->inflight); + usk->inflight--; } static void inc_inflight(struct unix_sock *usk) { - atomic_long_inc(&usk->inflight); + usk->inflight++; } static void inc_inflight_move_tail(struct unix_sock *u) { - atomic_long_inc(&u->inflight); + u->inflight++; + /* If this still might be part of a cycle, move it to the end * of the list, so that it's checked even if it was already * passed over @@ -234,20 +235,34 @@ void unix_gc(void) * receive queues. Other, non candidate sockets _can_ be * added to queue, so we must make sure only to touch * candidates. + * + * Embryos, though never candidates themselves, affect which + * candidates are reachable by the garbage collector. Before + * being added to a listener's queue, an embryo may already + * receive data carrying SCM_RIGHTS, potentially making the + * passed socket a candidate that is not yet reachable by the + * collector. It becomes reachable once the embryo is + * enqueued. Therefore, we must ensure that no SCM-laden + * embryo appears in a (candidate) listener's queue between + * consecutive scan_children() calls. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { + struct sock *sk = &u->sk; long total_refs; - long inflight_refs; - total_refs = file_count(u->sk.sk_socket->file); - inflight_refs = atomic_long_read(&u->inflight); + total_refs = file_count(sk->sk_socket->file); - BUG_ON(inflight_refs < 1); - BUG_ON(total_refs < inflight_refs); - if (total_refs == inflight_refs) { + BUG_ON(!u->inflight); + BUG_ON(total_refs < u->inflight); + if (total_refs == u->inflight) { list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); + + if (sk->sk_state == TCP_LISTEN) { + unix_state_lock_nested(sk, U_LOCK_GC_LISTENER); + unix_state_unlock(sk); + } } } @@ -271,7 +286,7 @@ void unix_gc(void) /* Move cursor to after the current position. */ list_move(&cursor, &u->link); - if (atomic_long_read(&u->inflight) > 0) { + if (u->inflight) { list_move_tail(&u->link, ¬_cycle_list); __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); scan_children(&u->sk, inc_inflight_move_tail, NULL); diff --git a/net/unix/scm.c b/net/unix/scm.c index d1048b4c2b..4eff7da9f6 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -52,12 +52,13 @@ void unix_inflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - if (atomic_long_inc_return(&u->inflight) == 1) { + if (!u->inflight) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); } else { BUG_ON(list_empty(&u->link)); } + u->inflight++; /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } @@ -74,10 +75,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - BUG_ON(!atomic_long_read(&u->inflight)); + BUG_ON(!u->inflight); BUG_ON(list_empty(&u->link)); - if (atomic_long_dec_and_test(&u->inflight)) + u->inflight--; + if (!u->inflight) list_del_init(&u->link); /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 99149b10f8..d758ec5655 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12890,6 +12890,8 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) error: for (i = 0; i < new_coalesce.n_rules; i++) { tmp_rule = &new_coalesce.rules[i]; + if (!tmp_rule) + continue; for (j = 0; j < tmp_rule->n_patterns; j++) kfree(tmp_rule->patterns[j].mask); kfree(tmp_rule->patterns); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 19b78d4722..9467193424 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -963,7 +963,7 @@ TRACE_EVENT(rdev_get_mpp, TRACE_EVENT(rdev_dump_mpp, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx, u8 *dst, u8 *mpp), - TP_ARGS(wiphy, netdev, _idx, mpp, dst), + TP_ARGS(wiphy, netdev, _idx, dst, mpp), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY @@ -1687,7 +1687,7 @@ TRACE_EVENT(rdev_return_void_tx_rx, DECLARE_EVENT_CLASS(tx_rx_evt, TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), - TP_ARGS(wiphy, rx, tx), + TP_ARGS(wiphy, tx, rx), TP_STRUCT__entry( WIPHY_ENTRY __field(u32, tx) @@ -1704,7 +1704,7 @@ DECLARE_EVENT_CLASS(tx_rx_evt, DEFINE_EVENT(tx_rx_evt, rdev_set_antenna, TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), - TP_ARGS(wiphy, rx, tx) + TP_ARGS(wiphy, tx, rx) ); DECLARE_EVENT_CLASS(wiphy_netdev_id_evt, diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index e5eb5616be..1f61d15b3d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1135,6 +1135,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, struct xsk_queue **q; int entries; + if (optlen < sizeof(entries)) + return -EINVAL; if (copy_from_sockptr(&entries, optval, sizeof(entries))) return -EFAULT; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index a686183271..7f326a01cb 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -400,11 +400,15 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) */ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_offload *xo = xfrm_offload(skb); int ihl = skb->data - skb_transport_header(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), skb_network_header(skb), ihl); + if (xo) + xo->orig_mac_len = + skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0; skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); @@ -415,11 +419,15 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) + struct xfrm_offload *xo = xfrm_offload(skb); int ihl = skb->data - skb_transport_header(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), skb_network_header(skb), ihl); + if (xo) + xo->orig_mac_len = + skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0; skb->network_header = skb->transport_header; } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index eebca0cbc6..cee851fbe2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3766,15 +3766,10 @@ static void xfrm_link_failure(struct sk_buff *skb) /* Impossible. Such dst must be popped before reaches point of failure. */ } -static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) +static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst) { - if (dst) { - if (dst->obsolete) { - dst_release(dst); - dst = NULL; - } - } - return dst; + if (dst->obsolete) + sk_dst_reset(sk); } static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ff56b6a016..d65f781f7a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1850,6 +1850,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) if (xp->xfrm_nr == 0) return 0; + if (xp->xfrm_nr > XFRM_MAX_DEPTH) + return -ENOBUFS; + for (i = 0; i < xp->xfrm_nr; i++) { struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index f182700e0a..6881a0c96b 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -53,6 +53,8 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) KBUILD_CFLAGS += $(call cc-disable-warning, cast-function-type-strict) +KBUILD_CFLAGS += -Wno-enum-compare-conditional +KBUILD_CFLAGS += -Wno-enum-enum-conversion endif endif diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 47f0474582..dce4cf55a4 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -23,7 +23,7 @@ modname = $(notdir $(@:.mod.o=)) part-of-module = y quiet_cmd_cc_o_c = CC [M] $@ - cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV), $(c_flags)) -c -o $@ $< + cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV) $(CFLAGS_KCSAN), $(c_flags)) -c -o $@ $< %.mod.o: %.mod.c FORCE $(call if_changed_dep,cc_o_c) diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index b04aa8e91a..e5c63f806e 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -463,6 +463,12 @@ static bool stackleak_gate(void) return false; if (STRING_EQUAL(section, ".meminit.text")) return false; + if (STRING_EQUAL(section, ".noinstr.text")) + return false; + if (STRING_EQUAL(section, ".entry.text")) + return false; + if (STRING_EQUAL(section, ".head.text")) + return false; } return track_frame_size >= 0; diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index 08f0587d15..0ff707bc18 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -46,12 +46,12 @@ if IS_BUILTIN(CONFIG_COMMON_CLK): LX_GDBPARSED(CLK_GET_RATE_NOCACHE) /* linux/fs.h */ -LX_VALUE(SB_RDONLY) -LX_VALUE(SB_SYNCHRONOUS) -LX_VALUE(SB_MANDLOCK) -LX_VALUE(SB_DIRSYNC) -LX_VALUE(SB_NOATIME) -LX_VALUE(SB_NODIRATIME) +LX_GDBPARSED(SB_RDONLY) +LX_GDBPARSED(SB_SYNCHRONOUS) +LX_GDBPARSED(SB_MANDLOCK) +LX_GDBPARSED(SB_DIRSYNC) +LX_GDBPARSED(SB_NOATIME) +LX_GDBPARSED(SB_NODIRATIME) /* linux/htimer.h */ LX_GDBPARSED(hrtimer_resolution) diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 7f8013dcef..f9786621a1 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -13,18 +13,21 @@ struct symbol symbol_yes = { .name = "y", + .type = S_TRISTATE, .curr = { "y", yes }, .flags = SYMBOL_CONST|SYMBOL_VALID, }; struct symbol symbol_mod = { .name = "m", + .type = S_TRISTATE, .curr = { "m", mod }, .flags = SYMBOL_CONST|SYMBOL_VALID, }; struct symbol symbol_no = { .name = "n", + .type = S_TRISTATE, .curr = { "n", no }, .flags = SYMBOL_CONST|SYMBOL_VALID, }; @@ -775,8 +778,7 @@ const char *sym_get_string_value(struct symbol *sym) case no: return "n"; case mod: - sym_calc_value(modules_sym); - return (modules_sym->curr.tri == no) ? "n" : "m"; + return "m"; case yes: return "y"; } diff --git a/security/keys/key.c b/security/keys/key.c index e65240641c..f2a84d86ea 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -464,7 +464,8 @@ static int __key_instantiate_and_link(struct key *key, if (authkey) key_invalidate(authkey); - key_set_expiry(key, prep->expiry); + if (prep->expiry != TIME64_MAX) + key_set_expiry(key, prep->expiry); } } diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c index bc700f85f8..ea277c55a3 100644 --- a/security/keys/trusted-keys/trusted_tpm2.c +++ b/security/keys/trusted-keys/trusted_tpm2.c @@ -38,6 +38,7 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, u8 *end_work = scratch + SCRATCH_SIZE; u8 *priv, *pub; u16 priv_len, pub_len; + int ret; priv_len = get_unaligned_be16(src) + 2; priv = src; @@ -57,8 +58,10 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, unsigned char bool[3], *w = bool; /* tag 0 is emptyAuth */ w = asn1_encode_boolean(w, w + sizeof(bool), true); - if (WARN(IS_ERR(w), "BUG: Boolean failed to encode")) - return PTR_ERR(w); + if (WARN(IS_ERR(w), "BUG: Boolean failed to encode")) { + ret = PTR_ERR(w); + goto err; + } work = asn1_encode_tag(work, end_work, 0, bool, w - bool); } @@ -69,8 +72,10 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, * trigger, so if it does there's something nefarious going on */ if (WARN(work - scratch + pub_len + priv_len + 14 > SCRATCH_SIZE, - "BUG: scratch buffer is too small")) - return -EINVAL; + "BUG: scratch buffer is too small")) { + ret = -EINVAL; + goto err; + } work = asn1_encode_integer(work, end_work, options->keyhandle); work = asn1_encode_octet_string(work, end_work, pub, pub_len); @@ -79,10 +84,18 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, work1 = payload->blob; work1 = asn1_encode_sequence(work1, work1 + sizeof(payload->blob), scratch, work - scratch); - if (WARN(IS_ERR(work1), "BUG: ASN.1 encoder failed")) - return PTR_ERR(work1); + if (IS_ERR(work1)) { + ret = PTR_ERR(work1); + pr_err("BUG: ASN.1 encoder failed with %d\n", ret); + goto err; + } + kfree(scratch); return work1 - payload->blob; + +err: + kfree(scratch); + return ret; } struct tpm2_key_context { diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 507d43827a..229a6918b5 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -32,6 +32,18 @@ #include "ruleset.h" #include "setup.h" +static bool is_initialized(void) +{ + if (likely(landlock_initialized)) + return true; + + pr_warn_once( + "Disabled but requested by user space. " + "You should enable Landlock at boot time: " + "https://docs.kernel.org/userspace-api/landlock.html#boot-time-configuration\n"); + return false; +} + /** * copy_min_struct_from_user - Safe future-proof argument copying * @@ -165,7 +177,7 @@ SYSCALL_DEFINE3(landlock_create_ruleset, /* Build-time checks. */ build_check_abi(); - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; if (flags) { @@ -311,7 +323,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, struct landlock_ruleset *ruleset; int res, err; - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; /* No flag for now. */ @@ -402,7 +414,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, struct landlock_cred_security *new_llcred; int err; - if (!landlock_initialized) + if (!is_initialized()) return -EOPNOTSUPP; /* diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index a5a78aef64..c6f211758f 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1264,7 +1264,8 @@ static int smack_inode_setxattr(struct user_namespace *mnt_userns, check_star = 1; } else if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) { check_priv = 1; - if (size != TRANS_TRUE_SIZE || + if (!S_ISDIR(d_backing_inode(dentry)->i_mode) || + size != TRANS_TRUE_SIZE || strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0) rc = -EINVAL; } else @@ -2720,6 +2721,15 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name, if (value == NULL || size > SMK_LONGLABEL || size == 0) return -EINVAL; + if (strcmp(name, XATTR_SMACK_TRANSMUTE) == 0) { + if (!S_ISDIR(inode->i_mode) || size != TRANS_TRUE_SIZE || + strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0) + return -EINVAL; + + nsp->smk_flags |= SMK_INODE_TRANSMUTE; + return 0; + } + skp = smk_import_entry(value, size); if (IS_ERR(skp)) return PTR_ERR(skp); diff --git a/sound/core/init.c b/sound/core/init.c index 7b3618997d..088f4f1874 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -303,8 +303,8 @@ static int snd_card_init(struct snd_card *card, struct device *parent, card->number = idx; #ifdef MODULE WARN_ON(!module); - card->module = module; #endif + card->module = module; INIT_LIST_HEAD(&card->devices); init_rwsem(&card->controls_rwsem); rwlock_init(&card->ctl_files_rwlock); @@ -508,6 +508,14 @@ int snd_card_disconnect(struct snd_card *card) } spin_unlock(&card->files_lock); +#ifdef CONFIG_PM + /* wake up sleepers here before other callbacks for avoiding potential + * deadlocks with other locks (e.g. in kctls); + * then this notifies the shutdown and sleepers would abort immediately + */ + wake_up_all(&card->power_sleep); +#endif + /* notify all connected devices about disconnection */ /* at this point, they cannot respond to any calls except release() */ @@ -535,7 +543,6 @@ int snd_card_disconnect(struct snd_card *card) mutex_unlock(&snd_card_mutex); #ifdef CONFIG_PM - wake_up(&card->power_sleep); snd_power_sync_ref(card); #endif return 0; diff --git a/sound/core/timer.c b/sound/core/timer.c index e08a37c23a..38f3b30efa 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -553,6 +553,16 @@ static int snd_timer_start1(struct snd_timer_instance *timeri, goto unlock; } + /* check the actual time for the start tick; + * bail out as error if it's way too low (< 100us) + */ + if (start) { + if ((u64)snd_timer_hw_resolution(timer) * ticks < 100000) { + result = -EINVAL; + goto unlock; + } + } + if (start) timeri->ticks = timeri->cticks = ticks; else if (!timeri->cticks) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index f8b644cb91..8753125683 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -771,10 +771,14 @@ static int check_cip_header(struct amdtp_stream *s, const __be32 *buf, } else { unsigned int dbc_interval; - if (*data_blocks > 0 && s->ctx_data.tx.dbc_interval > 0) - dbc_interval = s->ctx_data.tx.dbc_interval; - else - dbc_interval = *data_blocks; + if (!(s->flags & CIP_DBC_IS_PAYLOAD_QUADLETS)) { + if (*data_blocks > 0 && s->ctx_data.tx.dbc_interval > 0) + dbc_interval = s->ctx_data.tx.dbc_interval; + else + dbc_interval = *data_blocks; + } else { + dbc_interval = payload_length / sizeof(__be32); + } lost = dbc != ((*data_block_counter + dbc_interval) & 0xff); } diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index 1f957c946c..cf9ab34727 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -37,6 +37,9 @@ * the value of current SYT_INTERVAL; e.g. initial value is not zero. * @CIP_UNAWARE_SYT: For outgoing packet, the value in SYT field of CIP is 0xffff. * For incoming packet, the value in SYT field of CIP is not handled. + * @CIP_DBC_IS_PAYLOAD_QUADLETS: Available for incoming packet, and only effective with + * CIP_DBC_IS_END_EVENT flag. The value of dbc field is the number of accumulated quadlets + * in CIP payload, instead of the number of accumulated data blocks. */ enum cip_flags { CIP_NONBLOCKING = 0x00, @@ -51,6 +54,7 @@ enum cip_flags { CIP_NO_HEADER = 0x100, CIP_UNALIGHED_DBC = 0x200, CIP_UNAWARE_SYT = 0x400, + CIP_DBC_IS_PAYLOAD_QUADLETS = 0x800, }; /** diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index b7758dbe23..7c1e47aa4e 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -41,6 +41,8 @@ static bool is_link_enabled(struct fwnode_handle *fw_node, int i) "intel-quirk-mask", &quirk_mask); + fwnode_handle_put(link); + if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE) return false; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 200d0b953d..c7529aa13f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9131,6 +9131,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x86c1, "HP Laptop 15-da3001TU", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), @@ -9270,7 +9271,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x124c, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), - SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), + SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), @@ -10783,8 +10784,7 @@ static void alc897_hp_automute_hook(struct hda_codec *codec, snd_hda_gen_hp_automute(codec, jack); vref = spec->gen.hp_jack_present ? (PIN_HP | AC_PINCTL_VREF_100) : PIN_HP; - snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, - vref); + snd_hda_set_pin_ctl(codec, 0x1b, vref); } static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec, @@ -10793,6 +10793,10 @@ static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec, struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->gen.hp_automute_hook = alc897_hp_automute_hook; + spec->no_shutup_pins = 1; + } + if (action == HDA_FIXUP_ACT_PROBE) { + snd_hda_set_pin_ctl_cache(codec, 0x1a, PIN_IN | AC_PINCTL_VREF_100); } } diff --git a/sound/sh/aica.c b/sound/sh/aica.c index 6e9d6bd673..8b47bfcd90 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -278,7 +278,8 @@ static void run_spu_dma(struct work_struct *work) dreamcastcard->clicks++; if (unlikely(dreamcastcard->clicks >= AICA_PERIOD_NUMBER)) dreamcastcard->clicks %= AICA_PERIOD_NUMBER; - mod_timer(&dreamcastcard->timer, jiffies + 1); + if (snd_pcm_running(dreamcastcard->substream)) + mod_timer(&dreamcastcard->timer, jiffies + 1); } } @@ -290,6 +291,8 @@ static void aica_period_elapsed(struct timer_list *t) /*timer function - so cannot sleep */ int play_period; struct snd_pcm_runtime *runtime; + if (!snd_pcm_running(substream)) + return; runtime = substream->runtime; dreamcastcard = substream->pcm->private_data; /* Have we played out an additional period? */ @@ -350,12 +353,19 @@ static int snd_aicapcm_pcm_open(struct snd_pcm_substream return 0; } +static int snd_aicapcm_pcm_sync_stop(struct snd_pcm_substream *substream) +{ + struct snd_card_aica *dreamcastcard = substream->pcm->private_data; + + del_timer_sync(&dreamcastcard->timer); + cancel_work_sync(&dreamcastcard->spu_dma_work); + return 0; +} + static int snd_aicapcm_pcm_close(struct snd_pcm_substream *substream) { struct snd_card_aica *dreamcastcard = substream->pcm->private_data; - flush_work(&(dreamcastcard->spu_dma_work)); - del_timer(&dreamcastcard->timer); dreamcastcard->substream = NULL; kfree(dreamcastcard->channel); spu_disable(); @@ -401,6 +411,7 @@ static const struct snd_pcm_ops snd_aicapcm_playback_ops = { .prepare = snd_aicapcm_pcm_prepare, .trigger = snd_aicapcm_pcm_trigger, .pointer = snd_aicapcm_pcm_pointer, + .sync_stop = snd_aicapcm_pcm_sync_stop, }; /* TO DO: set up to handle more than one pcm instance */ diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c index 4dc6eed6c1..99676c426f 100644 --- a/sound/soc/codecs/da7219-aad.c +++ b/sound/soc/codecs/da7219-aad.c @@ -629,8 +629,10 @@ static struct da7219_aad_pdata *da7219_aad_fw_to_pdata(struct device *dev) return NULL; aad_pdata = devm_kzalloc(dev, sizeof(*aad_pdata), GFP_KERNEL); - if (!aad_pdata) + if (!aad_pdata) { + fwnode_handle_put(aad_np); return NULL; + } aad_pdata->irq = i2c->irq; @@ -705,6 +707,8 @@ static struct da7219_aad_pdata *da7219_aad_fw_to_pdata(struct device *dev) else aad_pdata->adc_1bit_rpt = DA7219_AAD_ADC_1BIT_RPT_1; + fwnode_handle_put(aad_np); + return aad_pdata; } diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 2cc3d814ba..5a44f52015 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -441,6 +441,7 @@ struct rt5645_priv { struct regmap *regmap; struct i2c_client *i2c; struct gpio_desc *gpiod_hp_det; + struct gpio_desc *gpiod_cbj_sleeve; struct snd_soc_jack *hp_jack; struct snd_soc_jack *mic_jack; struct snd_soc_jack *btn_jack; @@ -3179,6 +3180,9 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse regmap_update_bits(rt5645->regmap, RT5645_IN1_CTRL2, RT5645_CBJ_MN_JD, 0); + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 1); + msleep(600); regmap_read(rt5645->regmap, RT5645_IN1_CTRL3, &val); val &= 0x7; @@ -3195,6 +3199,8 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse snd_soc_dapm_disable_pin(dapm, "Mic Det Power"); snd_soc_dapm_sync(dapm); rt5645->jack_type = SND_JACK_HEADPHONE; + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); } if (rt5645->pdata.level_trigger_irq) regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2, @@ -3220,6 +3226,9 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse if (rt5645->pdata.level_trigger_irq) regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2, RT5645_JD_1_1_MASK, RT5645_JD_1_1_INV); + + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); } return rt5645->jack_type; @@ -3933,6 +3942,16 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, return ret; } + rt5645->gpiod_cbj_sleeve = devm_gpiod_get_optional(&i2c->dev, "cbj-sleeve", + GPIOD_OUT_LOW); + + if (IS_ERR(rt5645->gpiod_cbj_sleeve)) { + ret = PTR_ERR(rt5645->gpiod_cbj_sleeve); + dev_info(&i2c->dev, "failed to initialize gpiod, ret=%d\n", ret); + if (ret != -ENOENT) + return ret; + } + for (i = 0; i < ARRAY_SIZE(rt5645->supplies); i++) rt5645->supplies[i].supply = rt5645_supply_names[i]; @@ -4176,6 +4195,9 @@ static int rt5645_i2c_remove(struct i2c_client *i2c) cancel_delayed_work_sync(&rt5645->jack_detect_work); cancel_delayed_work_sync(&rt5645->rcclock_work); + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); + regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies); return 0; @@ -4193,6 +4215,9 @@ static void rt5645_i2c_shutdown(struct i2c_client *i2c) 0); msleep(20); regmap_write(rt5645->regmap, RT5645_RESET, 0); + + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); } static struct i2c_driver rt5645_i2c_driver = { diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c index 9fdd9afe00..f452245b21 100644 --- a/sound/soc/codecs/rt5682-sdw.c +++ b/sound/soc/codecs/rt5682-sdw.c @@ -787,12 +787,12 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt5682->disable_irq_lock); if (rt5682->disable_irq == true) { - mutex_lock(&rt5682->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF); rt5682->disable_irq = false; - mutex_unlock(&rt5682->disable_irq_lock); } + mutex_unlock(&rt5682->disable_irq_lock); goto regmap_sync; } diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index 4faf6b8544..25e8b9906f 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -443,13 +443,13 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt711->disable_irq_lock); if (rt711->disable_irq == true) { - mutex_lock(&rt711->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0); sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8); rt711->disable_irq = false; - mutex_unlock(&rt711->disable_irq_lock); } + mutex_unlock(&rt711->disable_irq_lock); goto regmap_sync; } diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index 9545b8a7eb..af7a0ab566 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -542,12 +542,12 @@ static int __maybe_unused rt711_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt711->disable_irq_lock); if (rt711->disable_irq == true) { - mutex_lock(&rt711->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF); rt711->disable_irq = false; - mutex_unlock(&rt711->disable_irq_lock); } + mutex_unlock(&rt711->disable_irq_lock); goto regmap_sync; } diff --git a/sound/soc/codecs/rt715-sdca.c b/sound/soc/codecs/rt715-sdca.c index bfa536bd71..7c8d6a012f 100644 --- a/sound/soc/codecs/rt715-sdca.c +++ b/sound/soc/codecs/rt715-sdca.c @@ -315,7 +315,7 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol, return 0; } -static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -17625, 375, 0); +static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -1725, 75, 0); static const DECLARE_TLV_DB_SCALE(mic_vol_tlv, 0, 1000, 0); static int rt715_sdca_get_volsw(struct snd_kcontrol *kcontrol, @@ -476,7 +476,7 @@ static const struct snd_kcontrol_new rt715_sdca_snd_controls[] = { RT715_SDCA_FU_VOL_CTRL, CH_01), SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_ADC7_27_VOL, RT715_SDCA_FU_VOL_CTRL, CH_02), - 0x2f, 0x7f, 0, + 0x2f, 0x3f, 0, rt715_sdca_set_amp_gain_get, rt715_sdca_set_amp_gain_put, in_vol_tlv), RT715_SDCA_EXT_TLV("FU02 Capture Volume", @@ -484,13 +484,13 @@ static const struct snd_kcontrol_new rt715_sdca_snd_controls[] = { RT715_SDCA_FU_VOL_CTRL, CH_01), rt715_sdca_set_amp_gain_4ch_get, rt715_sdca_set_amp_gain_4ch_put, - in_vol_tlv, 4, 0x7f), + in_vol_tlv, 4, 0x3f), RT715_SDCA_EXT_TLV("FU06 Capture Volume", SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_ADC10_11_VOL, RT715_SDCA_FU_VOL_CTRL, CH_01), rt715_sdca_set_amp_gain_4ch_get, rt715_sdca_set_amp_gain_4ch_put, - in_vol_tlv, 4, 0x7f), + in_vol_tlv, 4, 0x3f), /* MIC Boost Control */ RT715_SDCA_BOOST_EXT_TLV("FU0E Boost", SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_DMIC_GAIN_EN, diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c index b047bf87a1..e269026942 100644 --- a/sound/soc/codecs/rt715-sdw.c +++ b/sound/soc/codecs/rt715-sdw.c @@ -111,6 +111,7 @@ static bool rt715_readable_register(struct device *dev, unsigned int reg) case 0x839d: case 0x83a7: case 0x83a9: + case 0x752001: case 0x752039: return true; default: diff --git a/sound/soc/codecs/tas2552.c b/sound/soc/codecs/tas2552.c index 700baa6314..ba36525a57 100644 --- a/sound/soc/codecs/tas2552.c +++ b/sound/soc/codecs/tas2552.c @@ -2,7 +2,8 @@ /* * tas2552.c - ALSA SoC Texas Instruments TAS2552 Mono Audio Amplifier * - * Copyright (C) 2014 Texas Instruments Incorporated - https://www.ti.com + * Copyright (C) 2014 - 2024 Texas Instruments Incorporated - + * https://www.ti.com * * Author: Dan Murphy */ @@ -119,12 +120,14 @@ static const struct snd_soc_dapm_widget tas2552_dapm_widgets[] = &tas2552_input_mux_control), SND_SOC_DAPM_AIF_IN("DAC IN", "DAC Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("ASI OUT", "DAC Capture", 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_DAC("DAC", NULL, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_OUT_DRV("ClassD", TAS2552_CFG_2, 7, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("PLL", TAS2552_CFG_2, 3, 0, NULL, 0), SND_SOC_DAPM_POST("Post Event", tas2552_post_event), - SND_SOC_DAPM_OUTPUT("OUT") + SND_SOC_DAPM_OUTPUT("OUT"), + SND_SOC_DAPM_INPUT("DMIC") }; static const struct snd_soc_dapm_route tas2552_audio_map[] = { @@ -134,6 +137,7 @@ static const struct snd_soc_dapm_route tas2552_audio_map[] = { {"ClassD", NULL, "Input selection"}, {"OUT", NULL, "ClassD"}, {"ClassD", NULL, "PLL"}, + {"ASI OUT", NULL, "DMIC"} }; #ifdef CONFIG_PM @@ -538,6 +542,13 @@ static struct snd_soc_dai_driver tas2552_dai[] = { .rates = SNDRV_PCM_RATE_8000_192000, .formats = TAS2552_FORMATS, }, + .capture = { + .stream_name = "Capture", + .channels_min = 2, + .channels_max = 2, + .rates = SNDRV_PCM_RATE_8000_192000, + .formats = TAS2552_FORMATS, + }, .ops = &tas2552_speaker_dai_ops, }, }; diff --git a/sound/soc/intel/boards/bxt_da7219_max98357a.c b/sound/soc/intel/boards/bxt_da7219_max98357a.c index e49c64f54a..5bc2f8c82f 100644 --- a/sound/soc/intel/boards/bxt_da7219_max98357a.c +++ b/sound/soc/intel/boards/bxt_da7219_max98357a.c @@ -750,6 +750,7 @@ static struct snd_soc_card broxton_audio_card = { .dapm_routes = audio_map, .num_dapm_routes = ARRAY_SIZE(audio_map), .fully_routed = true, + .disable_route_checks = true, .late_probe = bxt_card_late_probe, }; diff --git a/sound/soc/intel/boards/bxt_rt298.c b/sound/soc/intel/boards/bxt_rt298.c index 0d1df37ece..cd11a40252 100644 --- a/sound/soc/intel/boards/bxt_rt298.c +++ b/sound/soc/intel/boards/bxt_rt298.c @@ -575,6 +575,7 @@ static struct snd_soc_card broxton_rt298 = { .dapm_routes = broxton_rt298_map, .num_dapm_routes = ARRAY_SIZE(broxton_rt298_map), .fully_routed = true, + .disable_route_checks = true, .late_probe = bxt_card_late_probe, }; diff --git a/sound/soc/intel/boards/glk_rt5682_max98357a.c b/sound/soc/intel/boards/glk_rt5682_max98357a.c index 99b3d7642c..5f1eb75048 100644 --- a/sound/soc/intel/boards/glk_rt5682_max98357a.c +++ b/sound/soc/intel/boards/glk_rt5682_max98357a.c @@ -603,6 +603,8 @@ static int geminilake_audio_probe(struct platform_device *pdev) card = &glk_audio_card_rt5682_m98357a; card->dev = &pdev->dev; snd_soc_card_set_drvdata(card, ctx); + if (!snd_soc_acpi_sof_parent(&pdev->dev)) + card->disable_route_checks = true; /* override platform name, if required */ mach = pdev->dev.platform_data; diff --git a/sound/soc/intel/boards/kbl_da7219_max98357a.c b/sound/soc/intel/boards/kbl_da7219_max98357a.c index 14b625e947..77f6898b39 100644 --- a/sound/soc/intel/boards/kbl_da7219_max98357a.c +++ b/sound/soc/intel/boards/kbl_da7219_max98357a.c @@ -621,6 +621,7 @@ static struct snd_soc_card kabylake_audio_card_da7219_m98357a = { .dapm_routes = kabylake_map, .num_dapm_routes = ARRAY_SIZE(kabylake_map), .fully_routed = true, + .disable_route_checks = true, .late_probe = kabylake_card_late_probe, }; diff --git a/sound/soc/intel/boards/kbl_da7219_max98927.c b/sound/soc/intel/boards/kbl_da7219_max98927.c index 2b43459adc..2c57c9204d 100644 --- a/sound/soc/intel/boards/kbl_da7219_max98927.c +++ b/sound/soc/intel/boards/kbl_da7219_max98927.c @@ -1018,6 +1018,7 @@ static struct snd_soc_card kbl_audio_card_da7219_m98927 = { .codec_conf = max98927_codec_conf, .num_configs = ARRAY_SIZE(max98927_codec_conf), .fully_routed = true, + .disable_route_checks = true, .late_probe = kabylake_card_late_probe, }; @@ -1036,6 +1037,7 @@ static struct snd_soc_card kbl_audio_card_max98927 = { .codec_conf = max98927_codec_conf, .num_configs = ARRAY_SIZE(max98927_codec_conf), .fully_routed = true, + .disable_route_checks = true, .late_probe = kabylake_card_late_probe, }; @@ -1053,6 +1055,7 @@ static struct snd_soc_card kbl_audio_card_da7219_m98373 = { .codec_conf = max98373_codec_conf, .num_configs = ARRAY_SIZE(max98373_codec_conf), .fully_routed = true, + .disable_route_checks = true, .late_probe = kabylake_card_late_probe, }; @@ -1070,6 +1073,7 @@ static struct snd_soc_card kbl_audio_card_max98373 = { .codec_conf = max98373_codec_conf, .num_configs = ARRAY_SIZE(max98373_codec_conf), .fully_routed = true, + .disable_route_checks = true, .late_probe = kabylake_card_late_probe, }; diff --git a/sound/soc/intel/boards/kbl_rt5660.c b/sound/soc/intel/boards/kbl_rt5660.c index 289ca39b82..776a1beaaf 100644 --- a/sound/soc/intel/boards/kbl_rt5660.c +++ b/sound/soc/intel/boards/kbl_rt5660.c @@ -519,6 +519,7 @@ static struct snd_soc_card kabylake_audio_card_rt5660 = { .dapm_routes = kabylake_rt5660_map, .num_dapm_routes = ARRAY_SIZE(kabylake_rt5660_map), .fully_routed = true, + .disable_route_checks = true, .late_probe = kabylake_card_late_probe, }; diff --git a/sound/soc/intel/boards/kbl_rt5663_max98927.c b/sound/soc/intel/boards/kbl_rt5663_max98927.c index a3e040a249..fa7d9cff98 100644 --- a/sound/soc/intel/boards/kbl_rt5663_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_max98927.c @@ -954,6 +954,7 @@ static struct snd_soc_card kabylake_audio_card_rt5663_m98927 = { .codec_conf = max98927_codec_conf, .num_configs = ARRAY_SIZE(max98927_codec_conf), .fully_routed = true, + .disable_route_checks = true, .late_probe = kabylake_card_late_probe, }; @@ -970,6 +971,7 @@ static struct snd_soc_card kabylake_audio_card_rt5663 = { .dapm_routes = kabylake_5663_map, .num_dapm_routes = ARRAY_SIZE(kabylake_5663_map), .fully_routed = true, + .disable_route_checks = true, .late_probe = kabylake_card_late_probe, }; diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index dd38fdaf2f..673eaa8917 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -779,6 +779,7 @@ static struct snd_soc_card kabylake_audio_card = { .codec_conf = max98927_codec_conf, .num_configs = ARRAY_SIZE(max98927_codec_conf), .fully_routed = true, + .disable_route_checks = true, .late_probe = kabylake_card_late_probe, }; diff --git a/sound/soc/intel/boards/skl_hda_dsp_generic.c b/sound/soc/intel/boards/skl_hda_dsp_generic.c index f4b4eeca3e..6aad5232ac 100644 --- a/sound/soc/intel/boards/skl_hda_dsp_generic.c +++ b/sound/soc/intel/boards/skl_hda_dsp_generic.c @@ -229,6 +229,8 @@ static int skl_hda_audio_probe(struct platform_device *pdev) ctx->common_hdmi_codec_drv = mach->mach_params.common_hdmi_codec_drv; hda_soc_card.dev = &pdev->dev; + if (!snd_soc_acpi_sof_parent(&pdev->dev)) + hda_soc_card.disable_route_checks = true; if (mach->mach_params.dmic_num > 0) { snprintf(hda_soc_components, sizeof(hda_soc_components), diff --git a/sound/soc/intel/boards/skl_nau88l25_max98357a.c b/sound/soc/intel/boards/skl_nau88l25_max98357a.c index e3a1f04a8b..3b62e15da9 100644 --- a/sound/soc/intel/boards/skl_nau88l25_max98357a.c +++ b/sound/soc/intel/boards/skl_nau88l25_max98357a.c @@ -643,6 +643,7 @@ static struct snd_soc_card skylake_audio_card = { .dapm_routes = skylake_map, .num_dapm_routes = ARRAY_SIZE(skylake_map), .fully_routed = true, + .disable_route_checks = true, .late_probe = skylake_card_late_probe, }; diff --git a/sound/soc/intel/boards/skl_rt286.c b/sound/soc/intel/boards/skl_rt286.c index 75dab54053..9c2ba695c1 100644 --- a/sound/soc/intel/boards/skl_rt286.c +++ b/sound/soc/intel/boards/skl_rt286.c @@ -524,6 +524,7 @@ static struct snd_soc_card skylake_rt286 = { .dapm_routes = skylake_rt286_map, .num_dapm_routes = ARRAY_SIZE(skylake_rt286_map), .fully_routed = true, + .disable_route_checks = true, .late_probe = skylake_card_late_probe, }; diff --git a/sound/soc/kirkwood/kirkwood-dma.c b/sound/soc/kirkwood/kirkwood-dma.c index 640cebd298..16d2c9acc3 100644 --- a/sound/soc/kirkwood/kirkwood-dma.c +++ b/sound/soc/kirkwood/kirkwood-dma.c @@ -182,6 +182,9 @@ static int kirkwood_dma_hw_params(struct snd_soc_component *component, const struct mbus_dram_target_info *dram = mv_mbus_dram_info(); unsigned long addr = substream->runtime->dma_addr; + if (!dram) + return 0; + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) kirkwood_dma_conf_mbus_windows(priv->io, KIRKWOOD_PLAYBACK_WIN, addr, dram); diff --git a/sound/soc/mediatek/mt8192/mt8192-dai-tdm.c b/sound/soc/mediatek/mt8192/mt8192-dai-tdm.c index f3bebed242..360259e60d 100644 --- a/sound/soc/mediatek/mt8192/mt8192-dai-tdm.c +++ b/sound/soc/mediatek/mt8192/mt8192-dai-tdm.c @@ -566,10 +566,10 @@ static int mtk_dai_tdm_hw_params(struct snd_pcm_substream *substream, tdm_con |= 1 << DELAY_DATA_SFT; tdm_con |= get_tdm_lrck_width(format) << LRCK_TDM_WIDTH_SFT; } else if (tdm_priv->tdm_out_mode == TDM_OUT_DSP_A) { - tdm_con |= 0 << DELAY_DATA_SFT; + tdm_con |= 1 << DELAY_DATA_SFT; tdm_con |= 0 << LRCK_TDM_WIDTH_SFT; } else if (tdm_priv->tdm_out_mode == TDM_OUT_DSP_B) { - tdm_con |= 1 << DELAY_DATA_SFT; + tdm_con |= 0 << DELAY_DATA_SFT; tdm_con |= 0 << LRCK_TDM_WIDTH_SFT; } diff --git a/sound/soc/meson/Kconfig b/sound/soc/meson/Kconfig index b93ea33739..6458d5dc49 100644 --- a/sound/soc/meson/Kconfig +++ b/sound/soc/meson/Kconfig @@ -99,6 +99,7 @@ config SND_MESON_AXG_PDM config SND_MESON_CARD_UTILS tristate + select SND_DYNAMIC_MINORS config SND_MESON_CODEC_GLUE tristate diff --git a/sound/soc/meson/axg-card.c b/sound/soc/meson/axg-card.c index 2b77010c2c..cbbaa55d92 100644 --- a/sound/soc/meson/axg-card.c +++ b/sound/soc/meson/axg-card.c @@ -320,6 +320,7 @@ static int axg_card_add_link(struct snd_soc_card *card, struct device_node *np, dai_link->cpus = cpu; dai_link->num_cpus = 1; + dai_link->nonatomic = true; ret = meson_card_parse_dai(card, np, &dai_link->cpus->of_node, &dai_link->cpus->dai_name); diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index bccfb770b3..94b169a549 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -3,6 +3,7 @@ // Copyright (c) 2018 BayLibre, SAS. // Author: Jerome Brunet +#include #include #include #include @@ -145,8 +146,8 @@ int axg_fifo_pcm_hw_params(struct snd_soc_component *component, /* Enable irq if necessary */ irq_en = runtime->no_period_wakeup ? 0 : FIFO_INT_COUNT_REPEAT; regmap_update_bits(fifo->map, FIFO_CTRL0, - CTRL0_INT_EN(FIFO_INT_COUNT_REPEAT), - CTRL0_INT_EN(irq_en)); + CTRL0_INT_EN, + FIELD_PREP(CTRL0_INT_EN, irq_en)); return 0; } @@ -176,9 +177,9 @@ int axg_fifo_pcm_hw_free(struct snd_soc_component *component, { struct axg_fifo *fifo = axg_fifo_data(ss); - /* Disable the block count irq */ + /* Disable irqs */ regmap_update_bits(fifo->map, FIFO_CTRL0, - CTRL0_INT_EN(FIFO_INT_COUNT_REPEAT), 0); + CTRL0_INT_EN, 0); return 0; } @@ -187,13 +188,13 @@ EXPORT_SYMBOL_GPL(axg_fifo_pcm_hw_free); static void axg_fifo_ack_irq(struct axg_fifo *fifo, u8 mask) { regmap_update_bits(fifo->map, FIFO_CTRL1, - CTRL1_INT_CLR(FIFO_INT_MASK), - CTRL1_INT_CLR(mask)); + CTRL1_INT_CLR, + FIELD_PREP(CTRL1_INT_CLR, mask)); /* Clear must also be cleared */ regmap_update_bits(fifo->map, FIFO_CTRL1, - CTRL1_INT_CLR(FIFO_INT_MASK), - 0); + CTRL1_INT_CLR, + FIELD_PREP(CTRL1_INT_CLR, 0)); } static irqreturn_t axg_fifo_pcm_irq_block(int irq, void *dev_id) @@ -203,18 +204,26 @@ static irqreturn_t axg_fifo_pcm_irq_block(int irq, void *dev_id) unsigned int status; regmap_read(fifo->map, FIFO_STATUS1, &status); + status = FIELD_GET(STATUS1_INT_STS, status); + axg_fifo_ack_irq(fifo, status); - status = STATUS1_INT_STS(status) & FIFO_INT_MASK; + /* Use the thread to call period elapsed on nonatomic links */ if (status & FIFO_INT_COUNT_REPEAT) - snd_pcm_period_elapsed(ss); - else - dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", - status); + return IRQ_WAKE_THREAD; - /* Ack irqs */ - axg_fifo_ack_irq(fifo, status); + dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", + status); + + return IRQ_NONE; +} + +static irqreturn_t axg_fifo_pcm_irq_block_thread(int irq, void *dev_id) +{ + struct snd_pcm_substream *ss = dev_id; + + snd_pcm_period_elapsed(ss); - return IRQ_RETVAL(status); + return IRQ_HANDLED; } int axg_fifo_pcm_open(struct snd_soc_component *component, @@ -242,8 +251,9 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, if (ret) return ret; - ret = request_irq(fifo->irq, axg_fifo_pcm_irq_block, 0, - dev_name(dev), ss); + ret = request_threaded_irq(fifo->irq, axg_fifo_pcm_irq_block, + axg_fifo_pcm_irq_block_thread, + IRQF_ONESHOT, dev_name(dev), ss); if (ret) return ret; @@ -254,15 +264,15 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, /* Setup status2 so it reports the memory pointer */ regmap_update_bits(fifo->map, FIFO_CTRL1, - CTRL1_STATUS2_SEL_MASK, - CTRL1_STATUS2_SEL(STATUS2_SEL_DDR_READ)); + CTRL1_STATUS2_SEL, + FIELD_PREP(CTRL1_STATUS2_SEL, STATUS2_SEL_DDR_READ)); /* Make sure the dma is initially disabled */ __dma_enable(fifo, false); /* Disable irqs until params are ready */ regmap_update_bits(fifo->map, FIFO_CTRL0, - CTRL0_INT_EN(FIFO_INT_MASK), 0); + CTRL0_INT_EN, 0); /* Clear any pending interrupt */ axg_fifo_ack_irq(fifo, FIFO_INT_MASK); diff --git a/sound/soc/meson/axg-fifo.h b/sound/soc/meson/axg-fifo.h index b63acd723c..5b7d32c379 100644 --- a/sound/soc/meson/axg-fifo.h +++ b/sound/soc/meson/axg-fifo.h @@ -42,21 +42,19 @@ struct snd_soc_pcm_runtime; #define FIFO_CTRL0 0x00 #define CTRL0_DMA_EN BIT(31) -#define CTRL0_INT_EN(x) ((x) << 16) +#define CTRL0_INT_EN GENMASK(23, 16) #define CTRL0_SEL_MASK GENMASK(2, 0) #define CTRL0_SEL_SHIFT 0 #define FIFO_CTRL1 0x04 -#define CTRL1_INT_CLR(x) ((x) << 0) -#define CTRL1_STATUS2_SEL_MASK GENMASK(11, 8) -#define CTRL1_STATUS2_SEL(x) ((x) << 8) +#define CTRL1_INT_CLR GENMASK(7, 0) +#define CTRL1_STATUS2_SEL GENMASK(11, 8) #define STATUS2_SEL_DDR_READ 0 -#define CTRL1_FRDDR_DEPTH_MASK GENMASK(31, 24) -#define CTRL1_FRDDR_DEPTH(x) ((x) << 24) +#define CTRL1_FRDDR_DEPTH GENMASK(31, 24) #define FIFO_START_ADDR 0x08 #define FIFO_FINISH_ADDR 0x0c #define FIFO_INT_ADDR 0x10 #define FIFO_STATUS1 0x14 -#define STATUS1_INT_STS(x) ((x) << 0) +#define STATUS1_INT_STS GENMASK(7, 0) #define FIFO_STATUS2 0x18 #define FIFO_INIT_ADDR 0x24 #define FIFO_CTRL2 0x28 diff --git a/sound/soc/meson/axg-frddr.c b/sound/soc/meson/axg-frddr.c index 37f4bb3469..38c731ad40 100644 --- a/sound/soc/meson/axg-frddr.c +++ b/sound/soc/meson/axg-frddr.c @@ -7,6 +7,7 @@ * This driver implements the frontend playback DAI of AXG and G12A based SoCs */ +#include #include #include #include @@ -59,8 +60,8 @@ static int axg_frddr_dai_hw_params(struct snd_pcm_substream *substream, /* Trim the FIFO depth if the period is small to improve latency */ depth = min(period, fifo->depth); val = (depth / AXG_FIFO_BURST) - 1; - regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_FRDDR_DEPTH_MASK, - CTRL1_FRDDR_DEPTH(val)); + regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_FRDDR_DEPTH, + FIELD_PREP(CTRL1_FRDDR_DEPTH, val)); return 0; } diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index 60d132ab1a..f514590236 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -362,13 +362,29 @@ static int axg_tdm_iface_hw_free(struct snd_pcm_substream *substream, return 0; } -static int axg_tdm_iface_prepare(struct snd_pcm_substream *substream, +static int axg_tdm_iface_trigger(struct snd_pcm_substream *substream, + int cmd, struct snd_soc_dai *dai) { - struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); + struct axg_tdm_stream *ts = + snd_soc_dai_get_dma_data(dai, substream); + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + axg_tdm_stream_start(ts); + break; + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + case SNDRV_PCM_TRIGGER_STOP: + axg_tdm_stream_stop(ts); + break; + default: + return -EINVAL; + } - /* Force all attached formatters to update */ - return axg_tdm_stream_reset(ts); + return 0; } static int axg_tdm_iface_remove_dai(struct snd_soc_dai *dai) @@ -408,8 +424,8 @@ static const struct snd_soc_dai_ops axg_tdm_iface_ops = { .set_fmt = axg_tdm_iface_set_fmt, .startup = axg_tdm_iface_startup, .hw_params = axg_tdm_iface_hw_params, - .prepare = axg_tdm_iface_prepare, .hw_free = axg_tdm_iface_hw_free, + .trigger = axg_tdm_iface_trigger, }; /* TDM Backend DAIs */ diff --git a/sound/soc/meson/axg-toddr.c b/sound/soc/meson/axg-toddr.c index d6adf7edea..85a17d8861 100644 --- a/sound/soc/meson/axg-toddr.c +++ b/sound/soc/meson/axg-toddr.c @@ -5,6 +5,7 @@ /* This driver implements the frontend capture DAI of AXG based SoCs */ +#include #include #include #include @@ -19,12 +20,9 @@ #define CTRL0_TODDR_EXT_SIGNED BIT(29) #define CTRL0_TODDR_PP_MODE BIT(28) #define CTRL0_TODDR_SYNC_CH BIT(27) -#define CTRL0_TODDR_TYPE_MASK GENMASK(15, 13) -#define CTRL0_TODDR_TYPE(x) ((x) << 13) -#define CTRL0_TODDR_MSB_POS_MASK GENMASK(12, 8) -#define CTRL0_TODDR_MSB_POS(x) ((x) << 8) -#define CTRL0_TODDR_LSB_POS_MASK GENMASK(7, 3) -#define CTRL0_TODDR_LSB_POS(x) ((x) << 3) +#define CTRL0_TODDR_TYPE GENMASK(15, 13) +#define CTRL0_TODDR_MSB_POS GENMASK(12, 8) +#define CTRL0_TODDR_LSB_POS GENMASK(7, 3) #define CTRL1_TODDR_FORCE_FINISH BIT(25) #define CTRL1_SEL_SHIFT 28 @@ -76,12 +74,12 @@ static int axg_toddr_dai_hw_params(struct snd_pcm_substream *substream, width = params_width(params); regmap_update_bits(fifo->map, FIFO_CTRL0, - CTRL0_TODDR_TYPE_MASK | - CTRL0_TODDR_MSB_POS_MASK | - CTRL0_TODDR_LSB_POS_MASK, - CTRL0_TODDR_TYPE(type) | - CTRL0_TODDR_MSB_POS(TODDR_MSB_POS) | - CTRL0_TODDR_LSB_POS(TODDR_MSB_POS - (width - 1))); + CTRL0_TODDR_TYPE | + CTRL0_TODDR_MSB_POS | + CTRL0_TODDR_LSB_POS, + FIELD_PREP(CTRL0_TODDR_TYPE, type) | + FIELD_PREP(CTRL0_TODDR_MSB_POS, TODDR_MSB_POS) | + FIELD_PREP(CTRL0_TODDR_LSB_POS, TODDR_MSB_POS - (width - 1))); return 0; } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index a5b3ee69fb..1c4d8b96f7 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1042,6 +1042,9 @@ int snd_soc_add_pcm_runtime(struct snd_soc_card *card, if (!snd_soc_is_matching_component(platform, component)) continue; + if (snd_soc_component_is_dummy(component) && component->num_dai) + continue; + snd_soc_rtd_add_component(rtd, component); } } diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index c56379fac9..57caa91a43 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -246,7 +246,7 @@ int snd_soc_get_volsw(struct snd_kcontrol *kcontrol, int max = mc->max; int min = mc->min; int sign_bit = mc->sign_bit; - unsigned int mask = (1 << fls(max)) - 1; + unsigned int mask = (1ULL << fls(max)) - 1; unsigned int invert = mc->invert; int val; int ret; diff --git a/sound/soc/tegra/tegra186_dspk.c b/sound/soc/tegra/tegra186_dspk.c index a74c980ee7..d5a74e2537 100644 --- a/sound/soc/tegra/tegra186_dspk.c +++ b/sound/soc/tegra/tegra186_dspk.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // tegra186_dspk.c - Tegra186 DSPK driver -// -// Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. #include #include @@ -241,14 +240,14 @@ static int tegra186_dspk_hw_params(struct snd_pcm_substream *substream, return -EINVAL; } - cif_conf.client_bits = TEGRA_ACIF_BITS_24; - switch (params_format(params)) { case SNDRV_PCM_FORMAT_S16_LE: cif_conf.audio_bits = TEGRA_ACIF_BITS_16; + cif_conf.client_bits = TEGRA_ACIF_BITS_16; break; case SNDRV_PCM_FORMAT_S32_LE: cif_conf.audio_bits = TEGRA_ACIF_BITS_32; + cif_conf.client_bits = TEGRA_ACIF_BITS_24; break; default: dev_err(dev, "unsupported format!\n"); diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index 56a19eeec5..5b82329f44 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -2423,12 +2423,6 @@ static int davinci_mcasp_probe(struct platform_device *pdev) mcasp_reparent_fck(pdev); - ret = devm_snd_soc_register_component(&pdev->dev, &davinci_mcasp_component, - &davinci_mcasp_dai[mcasp->op_mode], 1); - - if (ret != 0) - goto err; - ret = davinci_mcasp_get_dma_type(mcasp); switch (ret) { case PCM_EDMA: @@ -2455,6 +2449,12 @@ static int davinci_mcasp_probe(struct platform_device *pdev) goto err; } + ret = devm_snd_soc_register_component(&pdev->dev, &davinci_mcasp_component, + &davinci_mcasp_dai[mcasp->op_mode], 1); + + if (ret != 0) + goto err; + no_audio: ret = davinci_mcasp_init_gpiochip(mcasp); if (ret) { diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index b67617b68e..f4437015d4 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -202,7 +202,7 @@ int line6_send_raw_message_async(struct usb_line6 *line6, const char *buffer, struct urb *urb; /* create message: */ - msg = kmalloc(sizeof(struct message), GFP_ATOMIC); + msg = kzalloc(sizeof(struct message), GFP_ATOMIC); if (msg == NULL) return -ENOMEM; @@ -688,7 +688,7 @@ static int line6_init_cap_control(struct usb_line6 *line6) int ret; /* initialize USB buffers: */ - line6->buffer_listen = kmalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL); + line6->buffer_listen = kzalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL); if (!line6->buffer_listen) return -ENOMEM; @@ -697,7 +697,7 @@ static int line6_init_cap_control(struct usb_line6 *line6) return -ENOMEM; if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) { - line6->buffer_message = kmalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL); + line6->buffer_message = kzalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL); if (!line6->buffer_message) return -ENOMEM; diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index ec31f5b603..1c25c1072a 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -148,7 +148,7 @@ AVXcode: 65: SEG=GS (Prefix) 66: Operand-Size (Prefix) 67: Address-Size (Prefix) -68: PUSH Iz (d64) +68: PUSH Iz 69: IMUL Gv,Ev,Iz 6a: PUSH Ib (d64) 6b: IMUL Gv,Ev,Ib diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 45e0d64061..55ca620b56 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -643,7 +643,7 @@ static int sets_patch(struct object *obj) static int symbols_patch(struct object *obj) { - int err; + off_t err; if (__symbols_patch(obj, &obj->structs) || __symbols_patch(obj, &obj->unions) || diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c index 6a00a6eeca..c5c5082cb2 100644 --- a/tools/iio/iio_utils.c +++ b/tools/iio/iio_utils.c @@ -376,7 +376,7 @@ int build_channel_array(const char *device_dir, int buffer_idx, goto error_close_dir; } - seekdir(dp, 0); + rewinddir(dp); while (ent = readdir(dp), ent) { if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"), "_en") == 0) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 69d7f0d65b..54b8c899d2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6089,7 +6089,7 @@ struct bpf_fib_lookup { /* output: MTU value */ __u16 mtu_result; - }; + } __attribute__((packed, aligned(2))); /* input: L3 device index for lookup * output: device index from FIB lookup */ diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 5146ff0fa0..6aa1c35273 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -224,10 +224,10 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist) static void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { int hash; - struct perf_sample_id *sid = SID(evsel, cpu, thread); + struct perf_sample_id *sid = SID(evsel, cpu_map_idx, thread); sid->id = id; sid->evsel = evsel; @@ -245,21 +245,27 @@ void perf_evlist__reset_id_hash(struct perf_evlist *evlist) void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { - perf_evlist__id_hash(evlist, evsel, cpu, thread, id); + if (!SID(evsel, cpu_map_idx, thread)) + return; + + perf_evlist__id_hash(evlist, evsel, cpu_map_idx, thread, id); evsel->id[evsel->ids++] = id; } int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd) + int cpu_map_idx, int thread, int fd) { u64 read_data[4] = { 0, }; int id_idx = 1; /* The first entry is the counter value */ u64 id; int ret; + if (!SID(evsel, cpu_map_idx, thread)) + return -1; + ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); if (!ret) goto add; @@ -288,7 +294,7 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, id = read_data[id_idx]; add: - perf_evlist__id_add(evlist, evsel, cpu, thread, id); + perf_evlist__id_add(evlist, evsel, cpu_map_idx, thread, id); return 0; } diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index f366dbad6a..49b17b2b39 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -119,11 +119,11 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist); void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id); + int cpu_map_idx, int thread, u64 id); int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd); + int cpu_map_idx, int thread, int fd); void perf_evlist__reset_id_hash(struct perf_evlist *evlist); diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index 39ebf61920..e799d35cba 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -633,11 +633,10 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o const char *const subcommands[], const char *usagestr[], int flags) { struct parse_opt_ctx_t ctx; + char *buf = NULL; /* build usage string if it's not provided */ if (subcommands && !usagestr[0]) { - char *buf = NULL; - astrcatf(&buf, "%s %s [] {", subcmd_config.exec_name, argv[0]); for (int i = 0; subcommands[i]; i++) { @@ -679,7 +678,10 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o astrcatf(&error_buf, "unknown switch `%c'", *ctx.opt); usage_with_options(usagestr, options); } - + if (buf) { + usagestr[0] = NULL; + free(buf); + } return parse_options_end(&ctx); } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c3bb96e5bf..fd6714de22 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -169,8 +169,9 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "panic", "do_exit", "do_task_dead", + "kthread_exit", "make_task_dead", - "__module_put_and_exit", + "__module_put_and_kthread_exit", "complete_and_exit", "__reiserfs_panic", "lbug_with_loc", diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index b3d4bf08e7..f382cd53cb 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -322,7 +322,7 @@ below the processor's base frequency. Busy% = MPERF_delta/TSC_delta -Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval +Bzy_MHz = TSC_delta*APERF_delta/MPERF_delta/measurement_interval Note that these calculations depend on TSC_delta, so they are not reliable during intervals when TSC_MHz is not running at the base frequency. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 65ada8065c..0822e7dc0f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1761,9 +1761,10 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) - continue; - average.packages.counter[i] += p->counter[i]; + if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0)) + average.packages.counter[i] = p->counter[i]; + else + average.packages.counter[i] += p->counter[i]; } return 0; } diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 5fd9e59407..ebda9c366b 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -1241,6 +1241,7 @@ unsigned int get_pkg_num(int cpu) retval = fscanf(fp, "%d\n", &pkg); if (retval != 1) errx(1, "%s: failed to parse", pathname); + fclose(fp); return pkg; } diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index e6c381498e..449e45bd69 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -836,6 +836,7 @@ sub set_value { if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ && $prvalue !~ /^(config_|)bisect$/ && $prvalue !~ /^build$/ && + $prvalue !~ /^make_warnings_file$/ && $buildonly) { # Note if a test is something other than build, then we diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index eefd445b96..7465cbe19b 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -2014,9 +2014,9 @@ int main(int argc, char **argv) free(options.whitelist); if (options.blacklist) free(options.blacklist); + close(cg_fd); if (cg_created) cleanup_cgroup_environment(); - close(cg_fd); return err; } diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile index 8af25ae960..24d8910c7a 100644 --- a/tools/testing/selftests/filesystems/binderfs/Makefile +++ b/tools/testing/selftests/filesystems/binderfs/Makefile @@ -3,6 +3,4 @@ CFLAGS += -I../../../../../usr/include/ -pthread TEST_GEN_PROGS := binderfs_test -binderfs_test: binderfs_test.c ../../kselftest.h ../../kselftest_harness.h - include ../../lib.mk diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index b1ede62498..b7c8f29c09 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -18,7 +18,7 @@ echo 'sched:*' > set_event yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -29,7 +29,7 @@ echo 1 > events/sched/enable yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -40,7 +40,7 @@ echo 0 > events/sched/enable yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -ne 0 ]; then fail "any of scheduler events should not be recorded" fi diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c index 6ea7b9f37a..d7a8e321bb 100644 --- a/tools/testing/selftests/kcmp/kcmp_test.c +++ b/tools/testing/selftests/kcmp/kcmp_test.c @@ -88,7 +88,10 @@ int main(int argc, char **argv) int pid2 = getpid(); int ret; - fd2 = open(kpath, O_RDWR, 0644); + ksft_print_header(); + ksft_set_plan(3); + + fd2 = open(kpath, O_RDWR); if (fd2 < 0) { perror("Can't open file"); ksft_exit_fail(); @@ -152,7 +155,6 @@ int main(int argc, char **argv) ksft_inc_pass_cnt(); } - ksft_print_cnts(); if (ret) ksft_exit_fail(); @@ -162,5 +164,5 @@ int main(int argc, char **argv) waitpid(pid2, &status, P_ALL); - return ksft_exit_pass(); + return 0; } diff --git a/tools/testing/selftests/mqueue/setting b/tools/testing/selftests/mqueue/setting new file mode 100644 index 0000000000..a953c96aa1 --- /dev/null +++ b/tools/testing/selftests/mqueue/setting @@ -0,0 +1 @@ +timeout=180 diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 1162836f8f..6dc3cb4ac6 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -481,10 +481,10 @@ v3exc_timeout_test() RET=0 local X=("192.0.2.20" "192.0.2.30") - # GMI should be 3 seconds + # GMI should be 5 seconds ip link set dev br0 type bridge mcast_query_interval 100 \ mcast_query_response_interval 100 \ - mcast_membership_interval 300 + mcast_membership_interval 500 v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP ip link set dev br0 type bridge mcast_query_interval 500 \ @@ -492,7 +492,7 @@ v3exc_timeout_test() mcast_membership_interval 1500 $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q - sleep 3 + sleep 5 bridge -j -d -s mdb show dev br0 \ | jq -e ".[].mdb[] | \ select(.grp == \"$TEST_GROUP\" and \ diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index e2b9ff773c..f84ab2e657 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -478,10 +478,10 @@ mldv2exc_timeout_test() RET=0 local X=("2001:db8:1::20" "2001:db8:1::30") - # GMI should be 3 seconds + # GMI should be 5 seconds ip link set dev br0 type bridge mcast_query_interval 100 \ mcast_query_response_interval 100 \ - mcast_membership_interval 300 + mcast_membership_interval 500 mldv2exclude_prepare $h1 ip link set dev br0 type bridge mcast_query_interval 500 \ @@ -489,7 +489,7 @@ mldv2exc_timeout_test() mcast_membership_interval 1500 $MZ $h1 -c 1 $MZPKT_ALLOW2 -q - sleep 3 + sleep 5 bridge -j -d -s mdb show dev br0 \ | jq -e ".[].mdb[] | \ select(.grp == \"$TEST_GROUP\" and \ diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 57a681107f..a8178a9c1e 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -53,7 +53,7 @@ __chk_nr() printf "%-50s" "$msg" if [ $nr != $expected ]; then echo "[ fail ] expected $expected found $nr" - ret=$test_cnt + ret=${KSFT_FAIL} else echo "[ ok ]" fi @@ -88,10 +88,10 @@ wait_msk_nr() printf "%-50s" "$msg" if [ $i -ge $timeout ]; then echo "[ fail ] timeout while expecting $expected max $max last $nr" - ret=$test_cnt + ret=${KSFT_FAIL} elif [ $nr != $expected ]; then echo "[ fail ] expected $expected found $nr" - ret=$test_cnt + ret=${KSFT_FAIL} else echo "[ ok ]" fi diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 8efff3f9c5..5a1277d172 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -451,6 +451,7 @@ do_transfer() local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local stat_tcpfb_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -516,6 +517,7 @@ do_transfer() local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue") + local stat_tcpfb_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -560,6 +562,11 @@ do_transfer() fi fi + if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + printf "[ FAIL ]\nunexpected fallback to TCP" + rets=1 + fi + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then printf "[ OK ]" fi diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c index 7c5b12664b..bfb07dc495 100644 --- a/tools/testing/selftests/net/reuseaddr_conflict.c +++ b/tools/testing/selftests/net/reuseaddr_conflict.c @@ -109,6 +109,6 @@ int main(void) fd1 = open_port(0, 1); if (fd1 >= 0) error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6"); - fprintf(stderr, "Success"); + fprintf(stderr, "Success\n"); return 0; } diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index fa5aa588e5..ade308fb1a 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -238,7 +238,7 @@ for family in 4 6; do create_vxlan_pair ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on - run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 + run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10 cleanup # use NAT to circumvent GRO FWD check @@ -251,13 +251,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null - run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST - cleanup - - create_vxlan_pair - run_bench "UDP tunnel fwd perf" $OL_NET$DST - ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on - run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST cleanup done diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index 2deac2031d..021863f860 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -5,6 +5,8 @@ CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := resctrl_tests +LOCAL_HDRS += $(wildcard *.h) + include ../lib.mk -$(OUTPUT)/resctrl_tests: $(wildcard *.[ch]) +$(OUTPUT)/resctrl_tests: $(wildcard *.c) diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c index b5d592d409..d975a67673 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c @@ -158,6 +158,20 @@ static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) /* In preparation for sigreturn. */ SYSCALL_DISPATCH_OFF(glob_sel); + + /* + * The tests for argument handling assume that `syscall(x) == x`. This + * is a NOP on x86 because the syscall number is passed in %rax, which + * happens to also be the function ABI return register. Other + * architectures may need to swizzle the arguments around. + */ +#if defined(__riscv) +/* REG_A7 is not defined in libc headers */ +# define REG_A7 (REG_A0 + 7) + + ((ucontext_t *)ucontext)->uc_mcontext.__gregs[REG_A0] = + ((ucontext_t *)ucontext)->uc_mcontext.__gregs[REG_A7]; +#endif } TEST(dispatch_and_return) diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 0ba500056e..193a984f51 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end) diff = end.tv_usec - start.tv_usec; diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC; - if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { + if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { printf("Diff too high: %lld..", diff); return -1; } diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c index 48b9a80323..d13ebde203 100644 --- a/tools/testing/selftests/timers/valid-adjtimex.c +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -21,9 +21,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ - - - #include #include #include @@ -62,45 +59,47 @@ int clear_time_state(void) #define NUM_FREQ_OUTOFRANGE 4 #define NUM_FREQ_INVALID 2 +#define SHIFTED_PPM (1 << 16) + long valid_freq[NUM_FREQ_VALID] = { - -499<<16, - -450<<16, - -400<<16, - -350<<16, - -300<<16, - -250<<16, - -200<<16, - -150<<16, - -100<<16, - -75<<16, - -50<<16, - -25<<16, - -10<<16, - -5<<16, - -1<<16, + -499 * SHIFTED_PPM, + -450 * SHIFTED_PPM, + -400 * SHIFTED_PPM, + -350 * SHIFTED_PPM, + -300 * SHIFTED_PPM, + -250 * SHIFTED_PPM, + -200 * SHIFTED_PPM, + -150 * SHIFTED_PPM, + -100 * SHIFTED_PPM, + -75 * SHIFTED_PPM, + -50 * SHIFTED_PPM, + -25 * SHIFTED_PPM, + -10 * SHIFTED_PPM, + -5 * SHIFTED_PPM, + -1 * SHIFTED_PPM, -1000, - 1<<16, - 5<<16, - 10<<16, - 25<<16, - 50<<16, - 75<<16, - 100<<16, - 150<<16, - 200<<16, - 250<<16, - 300<<16, - 350<<16, - 400<<16, - 450<<16, - 499<<16, + 1 * SHIFTED_PPM, + 5 * SHIFTED_PPM, + 10 * SHIFTED_PPM, + 25 * SHIFTED_PPM, + 50 * SHIFTED_PPM, + 75 * SHIFTED_PPM, + 100 * SHIFTED_PPM, + 150 * SHIFTED_PPM, + 200 * SHIFTED_PPM, + 250 * SHIFTED_PPM, + 300 * SHIFTED_PPM, + 350 * SHIFTED_PPM, + 400 * SHIFTED_PPM, + 450 * SHIFTED_PPM, + 499 * SHIFTED_PPM, }; long outofrange_freq[NUM_FREQ_OUTOFRANGE] = { - -1000<<16, - -550<<16, - 550<<16, - 1000<<16, + -1000 * SHIFTED_PPM, + -550 * SHIFTED_PPM, + 550 * SHIFTED_PPM, + 1000 * SHIFTED_PPM, }; #define LONG_MAX (~0UL>>1) diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index c65c55b7a7..312889edb8 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -15,7 +15,6 @@ #include #include #include -#include "vm_util.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -71,16 +70,10 @@ int main(int argc, char **argv) { void *addr; int ret; - size_t hugepage_size; size_t length = LENGTH; int flags = FLAGS; int shift = 0; - hugepage_size = default_huge_page_size(); - /* munmap with fail if the length is not page aligned */ - if (hugepage_size > length) - length = hugepage_size; - if (argc > 1) length = atol(argv[1]) << 20; if (argc > 2) { diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c index 59a7f2346e..f7ed8084e1 100644 --- a/tools/tracing/latency/latency-collector.c +++ b/tools/tracing/latency/latency-collector.c @@ -935,12 +935,12 @@ static void show_available(void) } if (!tracers) { - warnx(no_tracer_msg); + warnx("%s", no_tracer_msg); return; } if (!found) { - warnx(no_latency_tr_msg); + warnx("%s", no_latency_tr_msg); tracefs_list_free(tracers); return; } @@ -983,7 +983,7 @@ static const char *find_default_tracer(void) for (i = 0; relevant_tracers[i]; i++) { valid = tracer_valid(relevant_tracers[i], ¬racer); if (notracer) - errx(EXIT_FAILURE, no_tracer_msg); + errx(EXIT_FAILURE, "%s", no_tracer_msg); if (valid) return relevant_tracers[i]; } @@ -1878,7 +1878,7 @@ static void scan_arguments(int argc, char *argv[]) } valid = tracer_valid(current_tracer, ¬racer); if (notracer) - errx(EXIT_FAILURE, no_tracer_msg); + errx(EXIT_FAILURE, "%s", no_tracer_msg); if (!valid) errx(EXIT_FAILURE, "The tracer %s is not supported by your kernel!\n", current_tracer); diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index dd777688d1..952afb1bc8 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -88,7 +88,27 @@ static void async_pf_execute(struct work_struct *work) rcuwait_wake_up(&vcpu->wait); mmput(mm); - kvm_put_kvm(vcpu->kvm); +} + +static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work) +{ + /* + * The async #PF is "done", but KVM must wait for the work item itself, + * i.e. async_pf_execute(), to run to completion. If KVM is a module, + * KVM must ensure *no* code owned by the KVM (the module) can be run + * after the last call to module_put(). Note, flushing the work item + * is always required when the item is taken off the completion queue. + * E.g. even if the vCPU handles the item in the "normal" path, the VM + * could be terminated before async_pf_execute() completes. + * + * Wake all events skip the queue and go straight done, i.e. don't + * need to be flushed (but sanity check that the work wasn't queued). + */ + if (work->wakeup_all) + WARN_ON_ONCE(work->work.func); + else + flush_work(&work->work); + kmem_cache_free(async_pf_cache, work); } void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) @@ -115,7 +135,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) #else if (cancel_work_sync(&work->work)) { mmput(work->mm); - kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } #endif @@ -127,7 +146,10 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_first_entry(&vcpu->async_pf.done, typeof(*work), link); list_del(&work->link); - kmem_cache_free(async_pf_cache, work); + + spin_unlock(&vcpu->async_pf.lock); + kvm_flush_and_free_async_pf_work(work); + spin_lock(&vcpu->async_pf.lock); } spin_unlock(&vcpu->async_pf.lock); @@ -152,7 +174,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) list_del(&work->queue); vcpu->async_pf.queued--; - kmem_cache_free(async_pf_cache, work); + kvm_flush_and_free_async_pf_work(work); } } @@ -187,7 +209,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, work->arch = *arch; work->mm = current->mm; mmget(work->mm); - kvm_get_kvm(work->vcpu->kvm); INIT_WORK(&work->work, async_pf_execute);