diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power index f85ce9e327b98b94ab1f119f9ec15edf6a13ccfa..c1075ecfdb4b53dfdb17a75679114c1f9d94bf87 100644 --- a/Documentation/ABI/testing/sysfs-class-power +++ b/Documentation/ABI/testing/sysfs-class-power @@ -1,3 +1,459 @@ +===== General Properties ===== + +What: /sys/class/power_supply//manufacturer +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the name of the device manufacturer. + + Access: Read + Valid values: Represented as string + +What: /sys/class/power_supply//model_name +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the name of the device model. + + Access: Read + Valid values: Represented as string + +What: /sys/class/power_supply//serial_number +Date: January 2008 +Contact: linux-pm@vger.kernel.org +Description: + Reports the serial number of the device. + + Access: Read + Valid values: Represented as string + +What: /sys/class/power_supply//type +Date: May 2010 +Contact: linux-pm@vger.kernel.org +Description: + Describes the main type of the supply. + + Access: Read + Valid values: "Battery", "UPS", "Mains", "USB" + +===== Battery Properties ===== + +What: /sys/class/power_supply//capacity +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Fine grain representation of battery capacity. + Access: Read + Valid values: 0 - 100 (percent) + +What: /sys/class/power_supply//capacity_alert_max +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Maximum battery capacity trip-wire value where the supply will + notify user-space of the event. This is normally used for the + battery discharging scenario where user-space needs to know the + battery has dropped to an upper level so it can take + appropriate action (e.g. warning user that battery level is + low). + + Access: Read, Write + Valid values: 0 - 100 (percent) + +What: /sys/class/power_supply//capacity_alert_min +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Minimum battery capacity trip-wire value where the supply will + notify user-space of the event. This is normally used for the + battery discharging scenario where user-space needs to know the + battery has dropped to a lower level so it can take + appropriate action (e.g. warning user that battery level is + critically low). + + Access: Read, Write + Valid values: 0 - 100 (percent) + +What: /sys/class/power_supply//capacity_level +Date: June 2009 +Contact: linux-pm@vger.kernel.org +Description: + Coarse representation of battery capacity. + + Access: Read + Valid values: "Unknown", "Critical", "Low", "Normal", "High", + "Full" + +What: /sys/class/power_supply//current_avg +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports an average IBAT current reading for the battery, over a + fixed period. Normally devices will provide a fixed interval in + which they average readings to smooth out the reported value. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//current_max +Date: October 2010 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum IBAT current allowed into the battery. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//current_now +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports an instant, single IBAT current reading for the battery. + This value is not averaged/smoothed. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//charge_type +Date: July 2009 +Contact: linux-pm@vger.kernel.org +Description: + Represents the type of charging currently being applied to the + battery. + + Access: Read + Valid values: "Unknown", "N/A", "Trickle", "Fast" + +What: /sys/class/power_supply//charge_term_current +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Reports the charging current value which is used to determine + when the battery is considered full and charging should end. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//health +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the health of the battery or battery side of charger + functionality. + + Access: Read + Valid values: "Unknown", "Good", "Overheat", "Dead", + "Over voltage", "Unspecified failure", "Cold", + "Watchdog timer expire", "Safety timer expire", + "Over current", "Warm", "Cool", "Hot" + +What: /sys/class/power_supply//precharge_current +Date: June 2017 +Contact: linux-pm@vger.kernel.org +Description: + Reports the charging current applied during pre-charging phase + for a battery charge cycle. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//present +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports whether a battery is present or not in the system. + + Access: Read + Valid values: + 0: Absent + 1: Present + +What: /sys/class/power_supply//status +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Represents the charging status of the battery. Normally this + is read-only reporting although for some supplies this can be + used to enable/disable charging to the battery. + + Access: Read, Write + Valid values: "Unknown", "Charging", "Discharging", + "Not charging", "Full" + +What: /sys/class/power_supply//technology +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Describes the battery technology supported by the supply. + + Access: Read + Valid values: "Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe", + "NiCd", "LiMn" + +What: /sys/class/power_supply//temp +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the current TBAT battery temperature reading. + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_alert_max +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Maximum TBAT temperature trip-wire value where the supply will + notify user-space of the event. This is normally used for the + battery charging scenario where user-space needs to know the + battery temperature has crossed an upper threshold so it can + take appropriate action (e.g. warning user that battery level is + critically high, and charging has stopped). + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_alert_min +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Minimum TBAT temperature trip-wire value where the supply will + notify user-space of the event. This is normally used for the + battery charging scenario where user-space needs to know the + battery temperature has crossed a lower threshold so it can take + appropriate action (e.g. warning user that battery level is + high, and charging current has been reduced accordingly to + remedy the situation). + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_max +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum allowed TBAT battery temperature for + charging. + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_min +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Reports the minimum allowed TBAT battery temperature for + charging. + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//voltage_avg, +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports an average VBAT voltage reading for the battery, over a + fixed period. Normally devices will provide a fixed interval in + which they average readings to smooth out the reported value. + + Access: Read + Valid values: Represented in microvolts + +What: /sys/class/power_supply//voltage_max, +Date: January 2008 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum safe VBAT voltage permitted for the battery, + during charging. + + Access: Read + Valid values: Represented in microvolts + +What: /sys/class/power_supply//voltage_min, +Date: January 2008 +Contact: linux-pm@vger.kernel.org +Description: + Reports the minimum safe VBAT voltage permitted for the battery, + during discharging. + + Access: Read + Valid values: Represented in microvolts + +What: /sys/class/power_supply//voltage_now, +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports an instant, single VBAT voltage reading for the battery. + This value is not averaged/smoothed. + + Access: Read + Valid values: Represented in microvolts + +===== USB Properties ===== + +What: /sys/class/power_supply//current_avg +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports an average IBUS current reading over a fixed period. + Normally devices will provide a fixed interval in which they + average readings to smooth out the reported value. + + Access: Read + Valid values: Represented in microamps + + +What: /sys/class/power_supply//current_max +Date: October 2010 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum IBUS current the supply can support. + + Access: Read + Valid values: Represented in microamps + +What: /sys/class/power_supply//current_now +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the IBUS current supplied now. This value is generally + read-only reporting, unless the 'online' state of the supply + is set to be programmable, in which case this value can be set + within the reported min/max range. + + Access: Read, Write + Valid values: Represented in microamps + +What: /sys/class/power_supply//input_current_limit +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Details the incoming IBUS current limit currently set in the + supply. Normally this is configured based on the type of + connection made (e.g. A configured SDP should output a maximum + of 500mA so the input current limit is set to the same value). + + Access: Read, Write + Valid values: Represented in microamps + +What: /sys/class/power_supply//online, +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Indicates if VBUS is present for the supply. When the supply is + online, and the supply allows it, then it's possible to switch + between online states (e.g. Fixed -> Programmable for a PD_PPS + USB supply so voltage and current can be controlled). + + Access: Read, Write + Valid values: + 0: Offline + 1: Online Fixed - Fixed Voltage Supply + 2: Online Programmable - Programmable Voltage Supply + +What: /sys/class/power_supply//temp +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the current supply temperature reading. This would + normally be the internal temperature of the device itself (e.g + TJUNC temperature of an IC) + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_alert_max +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Maximum supply temperature trip-wire value where the supply will + notify user-space of the event. This is normally used for the + charging scenario where user-space needs to know the supply + temperature has crossed an upper threshold so it can take + appropriate action (e.g. warning user that the supply + temperature is critically high, and charging has stopped to + remedy the situation). + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_alert_min +Date: July 2012 +Contact: linux-pm@vger.kernel.org +Description: + Minimum supply temperature trip-wire value where the supply will + notify user-space of the event. This is normally used for the + charging scenario where user-space needs to know the supply + temperature has crossed a lower threshold so it can take + appropriate action (e.g. warning user that the supply + temperature is high, and charging current has been reduced + accordingly to remedy the situation). + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_max +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum allowed supply temperature for operation. + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//temp_min +Date: July 2014 +Contact: linux-pm@vger.kernel.org +Description: + Reports the mainimum allowed supply temperature for operation. + + Access: Read + Valid values: Represented in 1/10 Degrees Celsius + +What: /sys/class/power_supply//usb_type +Date: March 2018 +Contact: linux-pm@vger.kernel.org +Description: + Reports what type of USB connection is currently active for + the supply, for example it can show if USB-PD capable source + is attached. + + Access: Read-Only + Valid values: "Unknown", "SDP", "DCP", "CDP", "ACA", "C", "PD", + "PD_DRP", "PD_PPS", "BrickID" + +What: /sys/class/power_supply//voltage_max +Date: January 2008 +Contact: linux-pm@vger.kernel.org +Description: + Reports the maximum VBUS voltage the supply can support. + + Access: Read + Valid values: Represented in microvolts + +What: /sys/class/power_supply//voltage_min +Date: January 2008 +Contact: linux-pm@vger.kernel.org +Description: + Reports the minimum VBUS voltage the supply can support. + + Access: Read + Valid values: Represented in microvolts + +What: /sys/class/power_supply//voltage_now +Date: May 2007 +Contact: linux-pm@vger.kernel.org +Description: + Reports the VBUS voltage supplied now. This value is generally + read-only reporting, unless the 'online' state of the supply + is set to be programmable, in which case this value can be set + within the reported min/max range. + + Access: Read, Write + Valid values: Represented in microvolts + +===== Device Specific Properties ===== + What: /sys/class/power/ds2760-battery.*/charge_now Date: May 2010 KernelVersion: 2.6.35 diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index e0a8a2c2c0c1a7ca44c45e851762eb3361a8b3d0..a67387006a0fc1152e75ae09c3dee64dccc73b66 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -318,3 +318,23 @@ Date: September 2019 Contact: "Hridya Valsaraju" Description: Average number of valid blocks. Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//mounted_time_sec +Date: February 2020 +Contact: "Jaegeuk Kim" +Description: Show the mounted time in secs of this partition. + +What: /sys/fs/f2fs//data_io_flag +Date: April 2020 +Contact: "Jaegeuk Kim" +Description: Give a way to attach REQ_META|FUA to data writes + given temperature-based bits. Now the bits indicate: + * REQ_META | REQ_FUA | + * 5 | 4 | 3 | 2 | 1 | 0 | + * Cold | Warm | Hot | Cold | Warm | Hot | + +What: /sys/fs/f2fs//iostat_period_ms +Date: April 2020 +Contact: "Daeho Jeong" +Description: Give a way to change iostat_period time. 3secs by default. + The new iostat trace gives stats gap given the period. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3b32e5fe1447e6cbc57a297dccf70c05e6e6439b..d545de46d7def45239ec5ea4d514b5c296a9fb4f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -137,6 +137,10 @@ dynamic table installation which will install SSDT tables to /sys/firmware/acpi/tables/dynamic. + acpi_no_watchdog [HW,ACPI,WDT] + Ignore the ACPI-based watchdog interface (WDAT) and let + a native driver control the watchdog device instead. + acpi_rsdp= [ACPI,EFI,KEXEC] Pass the RSDP address to the kernel, mostly used on machines running EFI runtime service to boot the diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index d4a85d535bf99e09cfe5c3dc41652eaa3b64f21b..4a9d9c794ee5d889638d1a3af008dec06d11feaf 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -44,8 +44,15 @@ The AArch64 Tagged Address ABI has two stages of relaxation depending how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space - management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use - of valid tagged pointers in this context is always allowed. + management (e.g. ``mprotect()``, ``madvise()``). The use of valid + tagged pointers in this context is allowed with the exception of + ``brk()``, ``mmap()`` and the ``new_address`` argument to + ``mremap()`` as these have the potential to alias with existing + user addresses. + + NOTE: This behaviour changed in v5.6 and so some earlier kernels may + incorrectly accept valid tagged pointers for the ``brk()``, + ``mmap()`` and ``mremap()`` system calls. 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI relaxation is disabled by default and the application thread needs to diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt index df873d1f3b7c598b6c30721d3eec915a20ea8621..2aaae210317bbbe73b83b2413a371856a6da308c 100644 --- a/Documentation/devicetree/bindings/net/fsl-fman.txt +++ b/Documentation/devicetree/bindings/net/fsl-fman.txt @@ -110,6 +110,13 @@ PROPERTIES Usage: required Definition: See soc/fsl/qman.txt and soc/fsl/bman.txt +- fsl,erratum-a050385 + Usage: optional + Value type: boolean + Definition: A boolean property. Indicates the presence of the + erratum A050385 which indicates that DMA transactions that are + split can result in a FMan lock. + ============================================================================= FMan MURAM Node diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index b4de21867b965051410cb29ea0a3ff45b51302d5..1ca9556ec849d41f31b0e05c5de026b4e38921d3 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -235,8 +235,8 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "en hide up to all remaining free space. The actual space that would be unusable can be viewed at /sys/fs/f2fs//unusable This space is reclaimed once checkpoint=enable. -compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo" - and "lz4" algorithm. +compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo", + "lz4" and "zstd" algorithm. compress_log_size=%u Support configuring compress cluster size, the size will be 4KB * (1 << %u), 16KB is minimum size, also it's default size. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 93e0a24045322b3e2417e024821520c9996eb6fc..c757c1c3cb814b2b1f5c565fd1c6551fa05d3b46 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -606,3 +606,10 @@ in your dentry operations instead. dentry separately, and it now has request_mask and query_flags arguments to specify the fields and sync type requested by statx. Filesystems not supporting any statx-specific features may ignore the new arguments. +-- +[mandatory] + + [should've been added in 2016] stale comment in finish_open() + nonwithstanding, failure exits in ->atomic_open() instances should + *NOT* fput() the file, no matter what. Everything is handled by the + caller. diff --git a/Documentation/sound/hd-audio/index.rst b/Documentation/sound/hd-audio/index.rst index f8a72ffffe66f49560a2f5cdb9ae61243824098f..6e12de9fc34e29eb168e33ac2ec53a3955ad9fc3 100644 --- a/Documentation/sound/hd-audio/index.rst +++ b/Documentation/sound/hd-audio/index.rst @@ -8,3 +8,4 @@ HD-Audio models controls dp-mst + realtek-pc-beep diff --git a/Documentation/sound/hd-audio/realtek-pc-beep.rst b/Documentation/sound/hd-audio/realtek-pc-beep.rst new file mode 100644 index 0000000000000000000000000000000000000000..be47c6f76a6e9fefed4a94c7b44bfea035d830ab --- /dev/null +++ b/Documentation/sound/hd-audio/realtek-pc-beep.rst @@ -0,0 +1,129 @@ +=============================== +Realtek PC Beep Hidden Register +=============================== + +This file documents the "PC Beep Hidden Register", which is present in certain +Realtek HDA codecs and controls a muxer and pair of passthrough mixers that can +route audio between pins but aren't themselves exposed as HDA widgets. As far +as I can tell, these hidden routes are designed to allow flexible PC Beep output +for codecs that don't have mixer widgets in their output paths. Why it's easier +to hide a mixer behind an undocumented vendor register than to just expose it +as a widget, I have no idea. + +Register Description +==================== + +The register is accessed via processing coefficient 0x36 on NID 20h. Bits not +identified below have no discernible effect on my machine, a Dell XPS 13 9350:: + + MSB LSB + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | |h|S|L| | B |R| | Known bits + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ + |0|0|1|1| 0x7 |0|0x0|1| 0x7 | Reset value + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +1Ah input select (B): 2 bits + When zero, expose the PC Beep line (from the internal beep generator, when + enabled with the Set Beep Generation verb on NID 01h, or else from the + external PCBEEP pin) on the 1Ah pin node. When nonzero, expose the headphone + jack (or possibly Line In on some machines) input instead. If PC Beep is + selected, the 1Ah boost control has no effect. + +Amplify 1Ah loopback, left (L): 1 bit + Amplify the left channel of 1Ah before mixing it into outputs as specified + by h and S bits. Does not affect the level of 1Ah exposed to other widgets. + +Amplify 1Ah loopback, right (R): 1 bit + Amplify the right channel of 1Ah before mixing it into outputs as specified + by h and S bits. Does not affect the level of 1Ah exposed to other widgets. + +Loopback 1Ah to 21h [active low] (h): 1 bit + When zero, mix 1Ah (possibly with amplification, depending on L and R bits) + into 21h (headphone jack on my machine). Mixed signal respects the mute + setting on 21h. + +Loopback 1Ah to 14h (S): 1 bit + When one, mix 1Ah (possibly with amplification, depending on L and R bits) + into 14h (internal speaker on my machine). Mixed signal **ignores** the mute + setting on 14h and is present whenever 14h is configured as an output. + +Path diagrams +============= + +1Ah input selection (DIV is the PC Beep divider set on NID 01h):: + + + | | | + +--DIV--+--!DIV--+ {1Ah boost control} + | | + +--(b == 0)--+--(b != 0)--+ + | + >1Ah (Beep/Headphone Mic/Line In)< + +Loopback of 1Ah to 21h/14h:: + + <1Ah (Beep/Headphone Mic/Line In)> + | + {amplify if L/R} + | + +-----!h-----+-----S-----+ + | | + {21h mute control} | + | | + >21h (Headphone)< >14h (Internal Speaker)< + +Background +========== + +All Realtek HDA codecs have a vendor-defined widget with node ID 20h which +provides access to a bank of registers that control various codec functions. +Registers are read and written via the standard HDA processing coefficient +verbs (Set/Get Coefficient Index, Set/Get Processing Coefficient). The node is +named "Realtek Vendor Registers" in public datasheets' verb listings and, +apart from that, is entirely undocumented. + +This particular register, exposed at coefficient 0x36 and named in commits from +Realtek, is of note: unlike most registers, which seem to control detailed +amplifier parameters not in scope of the HDA specification, it controls audio +routing which could just as easily have been defined using standard HDA mixer +and selector widgets. + +Specifically, it selects between two sources for the input pin widget with Node +ID (NID) 1Ah: the widget's signal can come either from an audio jack (on my +laptop, a Dell XPS 13 9350, it's the headphone jack, but comments in Realtek +commits indicate that it might be a Line In on some machines) or from the PC +Beep line (which is itself multiplexed between the codec's internal beep +generator and external PCBEEP pin, depending on if the beep generator is +enabled via verbs on NID 01h). Additionally, it can mix (with optional +amplification) that signal onto the 21h and/or 14h output pins. + +The register's reset value is 0x3717, corresponding to PC Beep on 1Ah that is +then amplified and mixed into both the headphones and the speakers. Not only +does this violate the HDA specification, which says that "[a vendor defined +beep input pin] connection may be maintained *only* while the Link reset +(**RST#**) is asserted", it means that we cannot ignore the register if we care +about the input that 1Ah would otherwise expose or if the PCBEEP trace is +poorly shielded and picks up chassis noise (both of which are the case on my +machine). + +Unfortunately, there are lots of ways to get this register configuration wrong. +Linux, it seems, has gone through most of them. For one, the register resets +after S3 suspend: judging by existing code, this isn't the case for all vendor +registers, and it's led to some fixes that improve behavior on cold boot but +don't last after suspend. Other fixes have successfully switched the 1Ah input +away from PC Beep but have failed to disable both loopback paths. On my +machine, this means that the headphone input is amplified and looped back to +the headphone output, which uses the exact same pins! As you might expect, this +causes terrible headphone noise, the character of which is controlled by the +1Ah boost control. (If you've seen instructions online to fix XPS 13 headphone +noise by changing "Headphone Mic Boost" in ALSA, now you know why.) + +The information here has been obtained through black-box reverse engineering of +the ALC256 codec's behavior and is not guaranteed to be correct. It likely +also applies for the ALC255, ALC257, ALC235, and ALC236, since those codecs +seem to be close relatives of the ALC256. (They all share one initialization +function.) Additionally, other codecs like the ALC225 and ALC285 also have this +register, judging by existing fixups in ``patch_realtek.c``, but specific +data (e.g. node IDs, bit positions, pin mappings) for those codecs may differ +from what I've described here. diff --git a/Documentation/usb/raw-gadget.rst b/Documentation/usb/raw-gadget.rst new file mode 100644 index 0000000000000000000000000000000000000000..9e78cb858f861b7ca52b987fcaed9942f3c2610c --- /dev/null +++ b/Documentation/usb/raw-gadget.rst @@ -0,0 +1,61 @@ +============== +USB Raw Gadget +============== + +USB Raw Gadget is a kernel module that provides a userspace interface for +the USB Gadget subsystem. Essentially it allows to emulate USB devices +from userspace. Enabled with CONFIG_USB_RAW_GADGET. Raw Gadget is +currently a strictly debugging feature and shouldn't be used in +production, use GadgetFS instead. + +Comparison to GadgetFS +~~~~~~~~~~~~~~~~~~~~~~ + +Raw Gadget is similar to GadgetFS, but provides a more low-level and +direct access to the USB Gadget layer for the userspace. The key +differences are: + +1. Every USB request is passed to the userspace to get a response, while + GadgetFS responds to some USB requests internally based on the provided + descriptors. However note, that the UDC driver might respond to some + requests on its own and never forward them to the Gadget layer. + +2. GadgetFS performs some sanity checks on the provided USB descriptors, + while Raw Gadget allows you to provide arbitrary data as responses to + USB requests. + +3. Raw Gadget provides a way to select a UDC device/driver to bind to, + while GadgetFS currently binds to the first available UDC. + +4. Raw Gadget uses predictable endpoint names (handles) across different + UDCs (as long as UDCs have enough endpoints of each required transfer + type). + +5. Raw Gadget has ioctl-based interface instead of a filesystem-based one. + +Userspace interface +~~~~~~~~~~~~~~~~~~~ + +To create a Raw Gadget instance open /dev/raw-gadget. Multiple raw-gadget +instances (bound to different UDCs) can be used at the same time. The +interaction with the opened file happens through the ioctl() calls, see +comments in include/uapi/linux/usb/raw_gadget.h for details. + +The typical usage of Raw Gadget looks like: + +1. Open Raw Gadget instance via /dev/raw-gadget. +2. Initialize the instance via USB_RAW_IOCTL_INIT. +3. Launch the instance with USB_RAW_IOCTL_RUN. +4. In a loop issue USB_RAW_IOCTL_EVENT_FETCH calls to receive events from + Raw Gadget and react to those depending on what kind of USB device + needs to be emulated. + +Potential future improvements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Implement ioctl's for setting/clearing halt status on endpoints. + +- Reporting more events (suspend, resume, etc.) through + USB_RAW_IOCTL_EVENT_FETCH. + +- Support O_NONBLOCK I/O. diff --git a/MAINTAINERS b/MAINTAINERS index 621734fe36030f5fff3bdad376613c4c8605dee5..858c3a81d063dd29c01dc48f4c73b964315aad0e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6899,7 +6899,7 @@ M: Joonas Lahtinen M: Rodrigo Vivi L: intel-gfx@lists.freedesktop.org W: https://01.org/linuxgraphics/ -B: https://01.org/linuxgraphics/documentation/how-report-bugs +B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs C: irc://chat.freenode.net/intel-gfx Q: http://patchwork.freedesktop.org/project/intel-gfx/ T: git git://anongit.freedesktop.org/drm-intel diff --git a/Makefile b/Makefile index cb179bde8d09de43a7d26ae4d3611c7d01046ddf..a75d57b348d9db88015b1b6c83ff8a96e17c1ed9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 171 +SUBLEVEL = 180 EXTRAVERSION = NAME = Petit Gorille @@ -875,7 +875,7 @@ LD_FLAGS_LTO_CLANG := -mllvm -import-instr-limit=5 KBUILD_LDFLAGS += $(LD_FLAGS_LTO_CLANG) KBUILD_LDFLAGS_MODULE += $(LD_FLAGS_LTO_CLANG) -KBUILD_LDS_MODULE += $(srctree)/scripts/module-lto.lds +KBUILD_LDFLAGS_MODULE += -T scripts/module-lto.lds # allow disabling only clang LTO where needed DISABLE_LTO_CLANG := -fno-lto @@ -883,11 +883,11 @@ export DISABLE_LTO_CLANG endif ifdef CONFIG_LTO -lto-flags := $(lto-clang-flags) -KBUILD_CFLAGS += $(lto-flags) +LTO_CFLAGS := $(lto-clang-flags) +KBUILD_CFLAGS += $(LTO_CFLAGS) DISABLE_LTO := $(DISABLE_LTO_CLANG) -export DISABLE_LTO +export LTO_CFLAGS DISABLE_LTO # LDFINAL_vmlinux and LDFLAGS_FINAL_vmlinux can be set to override # the linker and flags for vmlinux_link. @@ -913,12 +913,12 @@ endif ifdef CONFIG_CFI # cfi-flags are re-tested in prepare-compiler-check -cfi-flags := $(cfi-clang-flags) -KBUILD_CFLAGS += $(cfi-flags) +CFI_CFLAGS := $(cfi-clang-flags) +KBUILD_CFLAGS += $(CFI_CFLAGS) DISABLE_CFI := $(DISABLE_CFI_CLANG) DISABLE_LTO += $(DISABLE_CFI) -export DISABLE_CFI +export CFI_CFLAGS DISABLE_CFI endif ifdef CONFIG_SHADOW_CALL_STACK diff --git a/arch/Kconfig b/arch/Kconfig index eceadb83be66907a50e35ba76edf504f2251486f..82437946208dce5ef9e3fd5a855b11c493d949e7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -650,7 +650,6 @@ config LTO_CLANG depends on !KASAN select LTO select THIN_ARCHIVES - select LD_DEAD_CODE_DATA_ELIMINATION help This option enables clang's Link Time Optimization (LTO), which allows the compiler to optimize the kernel globally at link time. If you diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index b29f1a9fd6f7359957389d5cd30725b7a449f579..07c8e1a6c56e25dd40463107809e40491baac77c 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -14,6 +14,8 @@ #ifdef __ASSEMBLY__ #define ASM_NL ` /* use '`' to mark new line in macro */ +#define __ALIGN .align 4 +#define __ALIGN_STR __stringify(__ALIGN) /* annotation for data we want in DCCM - if enabled in .config */ .macro ARCFP_DATA nm diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d93c1347d97d51733f35a55a89dc3cbd53bb660d..48ebd0d1a0a709bc601b8a357aece1019293b667 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1572,12 +1572,10 @@ config THUMB2_KERNEL bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY depends on (CPU_V7 || CPU_V7M) && !CPU_V6 && !CPU_V6K default y if CPU_THUMBONLY - select ARM_ASM_UNIFIED select ARM_UNWIND help By enabling this option, the kernel will be compiled in - Thumb-2 mode. A compiler/assembler that understand the unified - ARM-Thumb syntax is needed. + Thumb-2 mode. If unsure, say N. @@ -1612,9 +1610,6 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11 Unless you are sure your tools don't have this problem, say Y. -config ARM_ASM_UNIFIED - bool - config ARM_PATCH_IDIV bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()" depends on CPU_32v7 && !XIP_KERNEL @@ -2113,7 +2108,7 @@ config XIP_PHYS_ADDR config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) - depends on !CPU_V7M + depends on MMU select KEXEC_CORE help kexec is a system call that implements the ability to shutdown your diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 6f71a32967c05b41f5483922e4bddd0a6396a5b4..975d4d29e2e1fd0c931918d2f65b80d9afa93101 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -129,9 +129,11 @@ ifeq ($(cc-name),clang) CFLAGS_ABI += -meabi gnu endif +# Accept old syntax despite ".syntax unified" +AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) + ifeq ($(CONFIG_THUMB2_KERNEL),y) AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it) -AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) CFLAGS_ISA :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb # Work around buggy relocation from gas if requested: @@ -139,7 +141,7 @@ ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y) CFLAGS_MODULE +=-fno-optimize-sibling-calls endif else -CFLAGS_ISA :=$(call cc-option,-marm,) +CFLAGS_ISA :=$(call cc-option,-marm,) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) endif diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index b8565fc33eea6bc18b88ab72cf8774b15f3c678d..e5f2cca86f04483c428869ee1e82ae65373d684b 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -118,6 +118,7 @@ &sdhci { #address-cells = <1>; #size-cells = <0>; + pinctrl-names = "default"; pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>; mmc-pwrseq = <&wifi_pwrseq>; non-removable; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index fdb018e1278fb51820f97510241e349591caa79e..9d1e1061d8af1f8a9fbc3f92911ca9cfbe3649f9 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -454,6 +454,7 @@ "dsi0_ddr2", "dsi0_ddr"; + status = "disabled"; }; thermal: thermal@7e212000 { diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index fec965009b9fc5ed9a747695a9215e046bde5aef..f271c564d57d74f8e585f94662c9144468c5e13d 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -137,6 +137,7 @@ #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x0 0x0 0xc0000000>; + dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>; ti,hwmods = "l3_main_1", "l3_main_2"; reg = <0x0 0x44000000 0x0 0x1000000>, <0x0 0x45000000 0x0 0x1000>; @@ -302,6 +303,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x20013000 0x13000 0 0xffed000>; + dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>; bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; @@ -356,6 +358,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x30013000 0x13000 0 0xffed000>; + dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>; bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index 1c88c581ff18879a4991f3a77634e3b998d56b3d..78d58b8af67e85fac505a697e858da29058f3751 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -17,3 +17,8 @@ &crossbar_mpu { ti,irqs-skip = <10 67 68 133 139 140>; }; + +&mmc3 { + /* dra76x is not affected by i887 */ + max-frequency = <96000000>; +}; diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi index 849eb3443cde2712705bbbe6c74679ddb81095a8..719e63092c2ea0911f2fcf8b19cb7b04d4f9a8b6 100644 --- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi @@ -587,7 +587,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; status = "okay"; @@ -598,7 +598,7 @@ pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; status = "okay"; @@ -1001,7 +1001,6 @@ MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17059 MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17059 MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17059 - MX6QDL_PAD_NANDF_D3__GPIO2_IO03 0x40010040 MX6QDL_PAD_NANDF_D2__GPIO2_IO02 0x40010040 >; }; @@ -1014,7 +1013,6 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 - MX6QDL_PAD_NANDF_D1__GPIO2_IO01 0x40010040 MX6QDL_PAD_NANDF_D0__GPIO2_IO00 0x40010040 >; diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 1343c86988c54d2653bb39bc9e3ca2cf7629f69e..68f4482c35e2df127f45f4da276223683cc23edf 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -562,7 +562,7 @@ }; mdio0: mdio@2d24000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; @@ -570,7 +570,7 @@ }; mdio1: mdio@2d64000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index eaff2a5751ddc11c5b1ec901a60c6f7c671aabac..bc3f53c79e9daa0c3748951e257639439b1d2829 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -131,6 +131,7 @@ #address-cells = <1>; #size-cells = <1>; ranges = <0 0 0 0xc0000000>; + dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>; ti,hwmods = "l3_main_1", "l3_main_2", "l3_main_3"; reg = <0 0x44000000 0 0x2000>, <0 0x44800000 0 0x3000>, diff --git a/arch/arm/boot/dts/ox810se.dtsi b/arch/arm/boot/dts/ox810se.dtsi index 46aa6db8353ac3bc1dfa9f2af943d0fee15551c7..3d2f91234f1a7e212bb308d67ce465713ccc372e 100644 --- a/arch/arm/boot/dts/ox810se.dtsi +++ b/arch/arm/boot/dts/ox810se.dtsi @@ -322,8 +322,8 @@ interrupt-controller; reg = <0 0x200>; #interrupt-cells = <1>; - valid-mask = <0xFFFFFFFF>; - clear-mask = <0>; + valid-mask = <0xffffffff>; + clear-mask = <0xffffffff>; }; timer0: timer@200 { diff --git a/arch/arm/boot/dts/ox820.dtsi b/arch/arm/boot/dts/ox820.dtsi index 459207536a46654246517adfdd62cd6a38629202..8355cb0345255fbb25f2a07c8a3e63ebb4f13c1b 100644 --- a/arch/arm/boot/dts/ox820.dtsi +++ b/arch/arm/boot/dts/ox820.dtsi @@ -239,8 +239,8 @@ reg = <0 0x200>; interrupts = ; #interrupt-cells = <1>; - valid-mask = <0xFFFFFFFF>; - clear-mask = <0>; + valid-mask = <0xffffffff>; + clear-mask = <0xffffffff>; }; timer0: timer@200 { diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi index 8ee0b2ca5d39a26556b570c6e529c1eeab2f42f3..2face089d65b90cc82322c4e4c99f36d42d0ec41 100644 --- a/arch/arm/boot/dts/r8a7779.dtsi +++ b/arch/arm/boot/dts/r8a7779.dtsi @@ -67,6 +67,14 @@ <0xf0000100 0x100>; }; + timer@f0000200 { + compatible = "arm,cortex-a9-global-timer"; + reg = <0xf0000200 0x100>; + interrupts = ; + clocks = <&cpg_clocks R8A7779_CLK_ZS>; + }; + timer@f0000600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0xf0000600 0x20>; diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index a91ae499614cbb5e1c23a646a04082026bec061f..2c3b952be63ebbc536d767798eb21dfba153d25b 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -20,8 +20,10 @@ #ifndef __ASM_UNIFIED_H #define __ASM_UNIFIED_H -#if defined(__ASSEMBLY__) && defined(CONFIG_ARM_ASM_UNIFIED) +#if defined(__ASSEMBLY__) .syntax unified +#else +__asm__(".syntax unified"); #endif #ifdef CONFIG_CPU_V7M @@ -64,77 +66,4 @@ #endif /* CONFIG_THUMB2_KERNEL */ -#ifndef CONFIG_ARM_ASM_UNIFIED - -/* - * If the unified assembly syntax isn't used (in ARM mode), these - * macros expand to an empty string - */ -#ifdef __ASSEMBLY__ - .macro it, cond - .endm - .macro itt, cond - .endm - .macro ite, cond - .endm - .macro ittt, cond - .endm - .macro itte, cond - .endm - .macro itet, cond - .endm - .macro itee, cond - .endm - .macro itttt, cond - .endm - .macro ittte, cond - .endm - .macro ittet, cond - .endm - .macro ittee, cond - .endm - .macro itett, cond - .endm - .macro itete, cond - .endm - .macro iteet, cond - .endm - .macro iteee, cond - .endm -#else /* !__ASSEMBLY__ */ -__asm__( -" .macro it, cond\n" -" .endm\n" -" .macro itt, cond\n" -" .endm\n" -" .macro ite, cond\n" -" .endm\n" -" .macro ittt, cond\n" -" .endm\n" -" .macro itte, cond\n" -" .endm\n" -" .macro itet, cond\n" -" .endm\n" -" .macro itee, cond\n" -" .endm\n" -" .macro itttt, cond\n" -" .endm\n" -" .macro ittte, cond\n" -" .endm\n" -" .macro ittet, cond\n" -" .endm\n" -" .macro ittee, cond\n" -" .endm\n" -" .macro itett, cond\n" -" .endm\n" -" .macro itete, cond\n" -" .endm\n" -" .macro iteet, cond\n" -" .endm\n" -" .macro iteee, cond\n" -" .endm\n"); -#endif /* __ASSEMBLY__ */ - -#endif /* CONFIG_ARM_ASM_UNIFIED */ - #endif /* !__ASM_UNIFIED_H */ diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index f4dd7f9663c10a704d3858de6beb538e5023cf78..0001742c131d965908fa7c9572482862dd269eba 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -103,6 +103,8 @@ static bool __init cntvct_functional(void) * this. */ np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); + if (!np) + np = of_find_compatible_node(NULL, NULL, "arm,armv8-timer"); if (!np) goto out_put; diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 8ff71058207dc1a43939d985f9781d68219d73cd..1d0923b4a82b38db31850e8ba1ff277b629bf39b 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -87,6 +87,10 @@ AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += suspend-imx6.o obj-$(CONFIG_SOC_IMX53) += suspend-imx53.o endif +ifeq ($(CONFIG_ARM_CPU_SUSPEND),y) +AFLAGS_resume-imx6.o :=-Wa,-march=armv7-a +obj-$(CONFIG_SOC_IMX6) += resume-imx6.o +endif obj-$(CONFIG_SOC_IMX6) += pm-imx6.o obj-$(CONFIG_SOC_IMX1) += mach-imx1.o diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index b09a2ec192671f6e496d47178210d2ac706026c1..4b318c86444686199bf7430813b8727c76ec3a4a 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -111,17 +111,17 @@ void imx_cpu_die(unsigned int cpu); int imx_cpu_kill(unsigned int cpu); #ifdef CONFIG_SUSPEND -void v7_cpu_resume(void); void imx53_suspend(void __iomem *ocram_vbase); extern const u32 imx53_suspend_sz; void imx6_suspend(void __iomem *ocram_vbase); #else -static inline void v7_cpu_resume(void) {} static inline void imx53_suspend(void __iomem *ocram_vbase) {} static const u32 imx53_suspend_sz; static inline void imx6_suspend(void __iomem *ocram_vbase) {} #endif +void v7_cpu_resume(void); + void imx6_pm_ccm_init(const char *ccm_compat); void imx6q_pm_init(void); void imx6dl_pm_init(void); diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S new file mode 100644 index 0000000000000000000000000000000000000000..5bd1ba7ef15b61cb98d1bd2d3af4e85acc5ec632 --- /dev/null +++ b/arch/arm/mach-imx/resume-imx6.S @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + */ + +#include +#include +#include +#include +#include "hardware.h" + +/* + * The following code must assume it is running from physical address + * where absolute virtual addresses to the data section have to be + * turned into relative ones. + */ + +ENTRY(v7_cpu_resume) + bl v7_invalidate_l1 +#ifdef CONFIG_CACHE_L2X0 + bl l2c310_early_resume +#endif + b cpu_resume +ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index 76ee2ceec8d546d0ca3f51aebcb8f40509213ba6..7d84b617af4815817d3ffbb60b8c067bcbb8a41b 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -333,17 +333,3 @@ resume: ret lr ENDPROC(imx6_suspend) - -/* - * The following code must assume it is running from physical address - * where absolute virtual addresses to the data section have to be - * turned into relative ones. - */ - -ENTRY(v7_cpu_resume) - bl v7_invalidate_l1 -#ifdef CONFIG_CACHE_L2X0 - bl l2c310_early_resume -#endif - b cpu_resume -ENDPROC(v7_cpu_resume) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index b18fb70c5dcf9b1b2cc6338ee8f79507e43376f7..e13aca6e6d4b1e067c449a6c6810212f9980c108 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -796,7 +796,11 @@ static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk, } /* Do LSR operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for LSR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx); @@ -829,7 +833,11 @@ static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk, } /* Do ARSH operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for ASR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx); diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi index 169e171407a631cef269be07a5836dcfabb3aae9..acd205ef329f7da400ba35aae6206bba98bd9938 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi @@ -21,6 +21,8 @@ }; &fman0 { + fsl,erratum-a050385; + /* these aliases provide the FMan ports mapping */ enet0: ethernet@e0000 { }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts index 3dc0c8e9663d435a00c938efbf11385c63c9db09..3aead63e54755a4777acb6069b74726f9b1fc9e4 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts @@ -155,12 +155,12 @@ ethernet@e4000 { phy-handle = <&rgmii_phy1>; - phy-connection-type = "rgmii-txid"; + phy-connection-type = "rgmii-id"; }; ethernet@e6000 { phy-handle = <&rgmii_phy2>; - phy-connection-type = "rgmii-txid"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts index 5dc2782e2a58e785c8ec0263d2f2faafe9c07987..e775e59d03702c8462b2c0bbe52064e7f6310414 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts @@ -162,12 +162,12 @@ &fman0 { ethernet@e4000 { phy-handle = <&rgmii_phy1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e6000 { phy-handle = <&rgmii_phy2>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 6f372ec055dd310a51757cb6274d00abf864d79f..da2949586c7a3bcc513f2be2e0ce23f7366815da 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -788,6 +788,8 @@ interrupts = <0 138 0>; phys = <&hsusb_phy2>; phy-names = "usb2-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; @@ -817,6 +819,8 @@ interrupts = <0 131 0>; phys = <&hsusb_phy1>, <&ssusb_phy_0>; phy-names = "usb2-phy", "usb3-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; }; diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 1442ac34d261e1eaa4833c363544ba3144a0dc6a..ba7cfa923d700d160aecd1e22f85f37cc501820e 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -12,7 +12,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_IKHEADERS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set @@ -29,7 +28,6 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_LZMA is not set # CONFIG_RD_XZ is not set # CONFIG_RD_LZO is not set -# CONFIG_RD_LZ4 is not set CONFIG_SGETMASK_SYSCALL=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_KALLSYMS_ALL=y @@ -40,7 +38,6 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set # CONFIG_SLAB_MERGE_DEFAULT is not set CONFIG_PROFILING=y -CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y CONFIG_LTO_CLANG=y @@ -307,6 +304,8 @@ CONFIG_SERIAL_DEV_BUS=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_HW_RANDOM_CAVIUM is not set +CONFIG_TCG_TPM=y +CONFIG_TCG_VTPM_PROXY=y # CONFIG_DEVPORT is not set # CONFIG_I2C_COMPAT is not set # CONFIG_I2C_HELPER_AUTO is not set @@ -360,6 +359,7 @@ CONFIG_HID_MAGICMOUSE=y CONFIG_HID_MICROSOFT=y CONFIG_HID_MONTEREY=y CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NINTENDO=y CONFIG_HID_NTRIG=y CONFIG_HID_ORTEK=y CONFIG_HID_PANTHERLORD=y @@ -447,6 +447,7 @@ CONFIG_SDCARD_FS=y CONFIG_PSTORE=y CONFIG_PSTORE_CONSOLE=y CONFIG_PSTORE_RAM=y +CONFIG_UNICODE=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_MUST_CHECK is not set @@ -459,6 +460,7 @@ CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_DETECT_HUNG_TASK is not set CONFIG_PANIC_TIMEOUT=5 CONFIG_SCHEDSTATS=y +CONFIG_DEBUG_LIST=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 5d08ab07df33b4f80db1b29c87c133ff64e0f05f..4ed869845a23bdf889aaaf2e326667f7e0201c88 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -37,7 +37,7 @@ void apply_alternatives(void *start, size_t length); " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ -#define ALTINSTR_ENTRY_CB(feature,cb) \ +#define ALTINSTR_ENTRY_CB(feature, cb) \ " .word 661b - .\n" /* label */ \ " .word " __stringify(cb) "- .\n" /* callback */ \ " .hword " __stringify(feature) "\n" /* feature bit */ \ @@ -83,7 +83,7 @@ void apply_alternatives(void *start, size_t length); oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY_CB(feature,cb) \ + ALTINSTR_ENTRY_CB(feature, cb) \ ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ @@ -216,7 +216,7 @@ alternative_endif .macro user_alt, label, oldinstr, newinstr, cond 9999: alternative_insn "\oldinstr", "\newinstr", \cond - _ASM_EXTABLE 9999b, \label + _asm_extable 9999b, \label .endm /* diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 2d04e37fc114bddf6c610c9eba7c5968f88aced6..bbd30a860077e50352d3c12c8f0a1c21045b1280 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -227,7 +227,7 @@ static inline unsigned long kaslr_offset(void) ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) #define untagged_addr(addr) ({ \ - u64 __addr = (__force u64)addr; \ + u64 __addr = (__force u64)(addr); \ __addr &= __untagged_addr(__addr); \ (__force __typeof__(addr))__addr; \ }) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 60d02c81a3a2c02a6d41de1721a529a4881b507e..6b9c3025c8179c0aa2cb0629ec2e85116bd28864 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -32,6 +32,10 @@ extern void __cpu_copy_user_page(void *to, const void *from, extern void copy_page(void *to, const void *from); extern void clear_page(void *to); +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + #define clear_user_page(addr,vaddr,pg) __cpu_clear_user_page(addr, vaddr) #define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ea73bdb278fdf521b81bb9c71b354006c2864520..fed87bc3177561810772efea584638695ca00df0 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -60,7 +60,9 @@ #ifndef CONFIG_BROKEN_GAS_INST #ifdef __ASSEMBLY__ -#define __emit_inst(x) .inst (x) +// The space separator is omitted so that __emit_inst(x) can be parsed as +// either an assembler directive or an assembler macro argument. +#define __emit_inst(x) .inst(x) #else #define __emit_inst(x) ".inst " __stringify((x)) "\n\t" #endif diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index a4dc115d765941afde81e9feed053d994e038fbe..092046704cbc90f620442f309d468ca0a48a3f5a 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -607,7 +607,7 @@ static struct undef_hook setend_hooks[] = { }, { /* Thumb mode */ - .instr_mask = 0x0000fff7, + .instr_mask = 0xfffffff7, .instr_val = 0x0000b650, .pstate_mask = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_MASK), .pstate_val = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_USR), diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index fcf1a156ae5ab97ec2a820e8e42e94481edd89f5..4a5eb4c57fe45db746efb494f9ff3def954ea63e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -41,9 +41,7 @@ EXPORT_SYMBOL_GPL(elf_hwcap); #define COMPAT_ELF_HWCAP_DEFAULT \ (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\ COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; @@ -1233,17 +1231,30 @@ static const struct arm64_cpu_capabilities arm64_features[] = { {}, }; -#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ - { \ - .desc = #cap, \ - .type = ARM64_CPUCAP_SYSTEM_FEATURE, \ + +#define HWCAP_CPUID_MATCH(reg, field, s, min_value) \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ .sign = s, \ .min_field_value = min_value, \ + +#define __HWCAP_CAP(name, cap_type, cap) \ + .desc = name, \ + .type = ARM64_CPUCAP_SYSTEM_FEATURE, \ .hwcap_type = cap_type, \ .hwcap = cap, \ + +#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + HWCAP_CPUID_MATCH(reg, field, s, min_value) \ + } + +#define HWCAP_CAP_MATCH(match, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + .matches = match, \ } static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { @@ -1276,8 +1287,35 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { {}, }; +#ifdef CONFIG_COMPAT +static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) +{ + /* + * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available, + * in line with that of arm32 as in vfp_init(). We make sure that the + * check is future proof, by making sure value is non-zero. + */ + u32 mvfr1; + + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1); + else + mvfr1 = read_sysreg_s(SYS_MVFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT); +} +#endif + static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT + HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index f4fdf6420ac5ca54145c004a223c0d502faaabd4..4cd962f6c4302a8763847f4c2178631709342a97 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -206,8 +206,19 @@ void fpsimd_preserve_current_state(void) */ void fpsimd_restore_current_state(void) { - if (!system_supports_fpsimd()) + /* + * For the tasks that were created before we detected the absence of + * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), + * e.g, init. This could be then inherited by the children processes. + * If we later detect that the system doesn't support FP/SIMD, + * we must clear the flag for all the tasks to indicate that the + * FPSTATE is clean (as we can't have one) to avoid looping for ever in + * do_notify_resume(). + */ + if (!system_supports_fpsimd()) { + clear_thread_flag(TIF_FOREIGN_FPSTATE); return; + } local_bh_disable(); @@ -229,7 +240,7 @@ void fpsimd_restore_current_state(void) */ void fpsimd_update_current_state(struct fpsimd_state *state) { - if (!system_supports_fpsimd()) + if (WARN_ON(!system_supports_fpsimd())) return; local_bh_disable(); diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index e0a80e230c5ee887e760e6fe3c23f934c4743ab5..e322a49a79a3376e38fda0762f7c962805dc34ed 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -264,12 +264,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index aa1d6377f992289459b67865a065a24fe3b43779..423a02bf0cde64df64de43c385df1b22e570373b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -433,6 +433,13 @@ static void ssbs_thread_switch(struct task_struct *next) if (unlikely(next->flags & PF_KTHREAD)) return; + /* + * If all CPUs implement the SSBS extension, then we just need to + * context-switch the PSTATE field. + */ + if (cpu_have_feature(cpu_feature(SSBS))) + return; + /* If the mitigation is enabled, then we leave SSBS clear. */ if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || test_tsk_thread_flag(next, TIF_SSBD)) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 242527f29c410549ef75b9b6875e3d5ff3a28855..e8574b95bda8693a2652ed240ebe8448d19691be 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -624,6 +624,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return 0; } +static int fpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!system_supports_fpsimd()) + return -ENODEV; + return regset->n; +} + /* * TODO: update fp accessors for lazy context switching (sync/flush hwstate) */ @@ -634,6 +641,9 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, struct user_fpsimd_state *uregs; uregs = &target->thread.fpsimd_state.user_fpsimd; + if (!system_supports_fpsimd()) + return -EINVAL; + if (target == current) fpsimd_preserve_current_state(); @@ -648,6 +658,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, struct user_fpsimd_state newstate = target->thread.fpsimd_state.user_fpsimd; + if (!system_supports_fpsimd()) + return -EINVAL; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); if (ret) return ret; @@ -740,6 +753,7 @@ static const struct user_regset aarch64_regsets[] = { */ .size = sizeof(u32), .align = sizeof(u32), + .active = fpr_active, .get = fpr_get, .set = fpr_set }, @@ -819,6 +833,7 @@ static int compat_gpr_get(struct task_struct *target, break; case 16: reg = task_pt_regs(target)->pstate; + reg = pstate_to_compat_psr(reg); break; case 17: reg = task_pt_regs(target)->orig_x0; @@ -886,6 +901,7 @@ static int compat_gpr_set(struct task_struct *target, newregs.pc = reg; break; case 16: + reg = compat_psr_to_pstate(reg); newregs.pstate = reg; break; case 17: @@ -914,6 +930,9 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.fpsimd_state.user_fpsimd; if (target == current) @@ -947,6 +966,9 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.fpsimd_state.user_fpsimd; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); @@ -1004,6 +1026,7 @@ static const struct user_regset aarch32_regsets[] = { .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), + .active = fpr_active, .get = compat_vfp_get, .set = compat_vfp_set }, diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index e09bf5d1560606405e25175d69d8b67929f02657..3832750cee8b49d11a7a808cb2c30d8da780ef93 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -321,6 +321,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, int err; sigset_t set; struct compat_aux_sigframe __user *aux; + unsigned long psr; err = get_sigset_t(&set, &sf->uc.uc_sigmask); if (err == 0) { @@ -344,7 +345,9 @@ static int compat_restore_sigframe(struct pt_regs *regs, __get_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); __get_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); __get_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); - __get_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + __get_user_error(psr, &sf->uc.uc_mcontext.arm_cpsr, err); + + regs->pstate = compat_psr_to_pstate(psr); /* * Avoid compat_sys_sigreturn() restarting. @@ -500,6 +503,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { struct compat_aux_sigframe __user *aux; + unsigned long psr = pstate_to_compat_psr(regs->pstate); int err = 0; __put_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); @@ -518,7 +522,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, __put_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); __put_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); __put_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); - __put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + __put_user_error(psr, &sf->uc.uc_mcontext.arm_cpsr, err); __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); /* set the compat FSR WnR */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index da2d254004d3fb9650b59c5ad7fe6786f0f0c9a3..379ff1366ef2c1b320dd1bbbc1b61217b59da296 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -949,11 +949,22 @@ void tick_broadcast(const struct cpumask *mask) } #endif +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + void smp_send_stop(void) { unsigned long timeout; - if (num_online_cpus() > 1) { + if (num_other_online_cpus()) { cpumask_t mask; cpumask_copy(&mask, cpu_online_mask); @@ -990,13 +1001,17 @@ void crash_smp_send_stop(void) cpus_stopped = 1; - if (num_online_cpus() == 1) + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) return; cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); pr_crit("SMP: stopping secondary CPUs\n"); smp_cross_call(&mask, IPI_CPU_CRASH_STOP); diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c index 0bde47e4fa694264256c1e8d2498436acc66320b..dcba53803fa5f9867eba2a0178c48ee5ed924c20 100644 --- a/arch/microblaze/kernel/cpu/cache.c +++ b/arch/microblaze/kernel/cpu/cache.c @@ -92,7 +92,8 @@ static inline void __disable_dcache_nomsr(void) #define CACHE_LOOP_LIMITS(start, end, cache_line_length, cache_size) \ do { \ int align = ~(cache_line_length - 1); \ - end = min(start + cache_size, end); \ + if (start < UINT_MAX - cache_size) \ + end = min(start + cache_size, end); \ start &= align; \ } while (0) diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index b3aec101a65d4ed96f4f986ea22c2210b297cdef..a27b3d70393f7927c2206113ce49164e79c44c53 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2199,6 +2199,9 @@ static int octeon_irq_cib_map(struct irq_domain *d, } cd = kzalloc(sizeof(*cd), GFP_KERNEL); + if (!cd) + return -ENOMEM; + cd->host_data = host_data; cd->bit = hw; diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 544ea21bfef9cdae6361c964d5cdd91c56f57c88..b2683aca401f61f84770317be2358065100f567a 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -134,7 +134,7 @@ void release_vpe(struct vpe *v) { list_del(&v->list); if (v->load_addr) - release_progmem(v); + release_progmem(v->load_addr); kfree(v); } diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/loongson-3/platform.c index 25a97cc0ee33664f79f16e3ffb4d350a4b033dae..0db4cc3196ebdcc92b98791e139f652636b83c9f 100644 --- a/arch/mips/loongson64/loongson-3/platform.c +++ b/arch/mips/loongson64/loongson-3/platform.c @@ -31,6 +31,9 @@ static int __init loongson3_platform_init(void) continue; pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + pdev->name = loongson_sysconf.sensors[i].name; pdev->id = loongson_sysconf.sensors[i].id; pdev->dev.platform_data = &loongson_sysconf.sensors[i]; diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 197ced1eaaa0932d59d214a469e25f53b79fa483..4a16115b47eb06bb0fc54fc8e775ae9499090a37 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -108,6 +108,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, extern int hash__has_transparent_hugepage(void); #endif +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + BUG(); + return pmd; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 8d40cf03cb67d10a691bc2d0b92071dbccc0c8e8..2194866225f84b2422160a3d5b335e2e1fa37cb0 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -181,7 +181,7 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, */ static inline int hash__pmd_trans_huge(pmd_t pmd) { - return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) == + return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)) == (_PAGE_PTE | H_PAGE_THP_HUGE)); } @@ -209,6 +209,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 4dd13b503dbbd8f265f271d4bcf7cd3761e84701..bcb79a96a6c836bd9b2c81ceb3579ed62e96befd 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1179,7 +1179,9 @@ extern void serialize_against_pte_lookup(struct mm_struct *mm); static inline pmd_t pmd_mkdevmap(pmd_t pmd) { - return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); + if (radix_enabled()) + return radix__pmd_mkdevmap(pmd); + return hash__pmd_mkdevmap(pmd); } static inline int pmd_devmap(pmd_t pmd) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 19c44e1495aefe8b1170385f87e0877dedb92341..7a1fc49aaf99f36e9254a97e0e561010ee0fa720 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -289,6 +289,11 @@ extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, extern int radix__has_transparent_hugepage(void); #endif +static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); +} + extern int __meminit radix__vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/include/asm/setjmp.h b/arch/powerpc/include/asm/setjmp.h index 279d03a1eec625b12d93632ce7586796835bee97..6941fe202bc822b57ef115d2e51d97c8ed83e576 100644 --- a/arch/powerpc/include/asm/setjmp.h +++ b/arch/powerpc/include/asm/setjmp.h @@ -12,7 +12,9 @@ #define JMP_BUF_LEN 23 -extern long setjmp(long *); -extern void longjmp(long *, long); +typedef long jmp_buf[JMP_BUF_LEN]; + +extern int setjmp(jmp_buf env) __attribute__((returns_twice)); +extern void longjmp(jmp_buf env, int val) __attribute__((noreturn)); #endif /* _ASM_POWERPC_SETJMP_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 5607ce67d178b14c055f8918a18cae97ea19fbf8..681f966b7211d0bd8af7c750ddaf7bc527e37898 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -5,9 +5,6 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -# Avoid clang warnings around longjmp/setjmp declarations -CFLAGS_crash.o += -ffreestanding - subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ifeq ($(CONFIG_PPC64),y) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index da4b0e37923802dad23a3dfe7316eb8b7d4e2cef..6ef41e823013fed8bf60c7f31d08cea2a588b5e7 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2232,11 +2232,13 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, * oprofile_cpu_type already has a value, then we are * possibly overriding a real PVR with a logical one, * and, in that case, keep the current value for - * oprofile_cpu_type. + * oprofile_cpu_type. Futhermore, let's ensure that the + * fix for the PMAO bug is enabled on compatibility mode. */ if (old.oprofile_cpu_type != NULL) { t->oprofile_cpu_type = old.oprofile_cpu_type; t->oprofile_type = old.oprofile_type; + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 470284f9e4f6661f6716b0be1a78b14bae9f33b3..5a48c93aaa1b35b6bacb326fb65976ff8541af0a 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -520,12 +520,6 @@ static void *eeh_rmv_device(void *data, void *userdata) pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0); edev->pdev = NULL; - - /* - * We have to set the VF PE number to invalid one, which is - * required to plug the VF successfully. - */ - pdn->pe_number = IODA_INVALID_PE; #endif if (rmv_data) list_add(&edev->rmv_list, &rmv_data->edev_list); diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 74fc2043108261ebc183c270001a826ad846e5ae..01b823bdb49c2a4ff0a66e0e2b45722ba79014a8 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -163,8 +163,11 @@ core_idle_lock_held: bne- core_idle_lock_held blr -/* Reuse an unused pt_regs slot for IAMR */ +/* Reuse some unused pt_regs slots for AMR/IAMR/UAMOR/UAMOR */ +#define PNV_POWERSAVE_AMR _TRAP #define PNV_POWERSAVE_IAMR _DAR +#define PNV_POWERSAVE_UAMOR _DSISR +#define PNV_POWERSAVE_AMOR RESULT /* * Pass requested state in r3: @@ -198,8 +201,16 @@ pnv_powersave_common: SAVE_NVGPRS(r1) BEGIN_FTR_SECTION + mfspr r4, SPRN_AMR mfspr r5, SPRN_IAMR + mfspr r6, SPRN_UAMOR + std r4, PNV_POWERSAVE_AMR(r1) std r5, PNV_POWERSAVE_IAMR(r1) + std r6, PNV_POWERSAVE_UAMOR(r1) +BEGIN_FTR_SECTION_NESTED(42) + mfspr r7, SPRN_AMOR + std r7, PNV_POWERSAVE_AMOR(r1) +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfcr r5 @@ -951,12 +962,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) REST_GPR(2, r1) BEGIN_FTR_SECTION - /* IAMR was saved in pnv_powersave_common() */ + /* These regs were saved in pnv_powersave_common() */ + ld r4, PNV_POWERSAVE_AMR(r1) ld r5, PNV_POWERSAVE_IAMR(r1) + ld r6, PNV_POWERSAVE_UAMOR(r1) + mtspr SPRN_AMR, r4 mtspr SPRN_IAMR, r5 + mtspr SPRN_UAMOR, r6 +BEGIN_FTR_SECTION_NESTED(42) + ld r7, PNV_POWERSAVE_AMOR(r1) + mtspr SPRN_AMOR, r7 +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) /* - * We don't need an isync here because the upcoming mtmsrd is - * execution synchronizing. + * We don't need an isync here after restoring IAMR because the upcoming + * mtmsrd is execution synchronizing. */ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 10b46b35c059fe0c01424e3e0b1b1d90260abb76..07d3f3b402463fad208abc9819c40092fe4f3ac7 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -279,6 +279,9 @@ int kprobe_handler(struct pt_regs *regs) if (user_mode(regs)) return 0; + if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)) + return 0; + /* * We don't want to be preempted for the entire * duration of kprobe processing diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 0e395afbf0f498a89f2fd66a8f8b6ed74f381f24..0e45a446a8c78794e3035822db96f0d43657d8ee 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -261,9 +261,22 @@ void remove_dev_pci_data(struct pci_dev *pdev) continue; #ifdef CONFIG_EEH - /* Release EEH device for the VF */ + /* + * Release EEH state for this VF. The PCI core + * has already torn down the pci_dev for this VF, but + * we're responsible to removing the eeh_dev since it + * has the same lifetime as the pci_dn that spawned it. + */ edev = pdn_to_eeh_dev(pdn); if (edev) { + /* + * We allocate pci_dn's for the totalvfs count, + * but only only the vfs that were activated + * have a configured PE. + */ + if (edev->pe) + eeh_rmv_from_parent_pe(edev); + pdn->edev = NULL; kfree(edev); } diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 6ca1b3a1e19679d394e2610d9e0106b94f3e2746..54e949d5452d55ce44c6f9f0d295845f1d15d5d6 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -82,10 +82,16 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) const __be32 *addrs; u32 i; int proplen; + bool mark_unset = false; addrs = of_get_property(node, "assigned-addresses", &proplen); - if (!addrs) - return; + if (!addrs || !proplen) { + addrs = of_get_property(node, "reg", &proplen); + if (!addrs || !proplen) + return; + mark_unset = true; + } + pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs); for (; proplen >= 20; proplen -= 20, addrs += 5) { flags = pci_parse_of_flags(of_read_number(addrs, 1), 0); @@ -110,6 +116,8 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) continue; } res->flags = flags; + if (mark_unset) + res->flags |= IORESOURCE_UNSET; res->name = pci_name(dev); region.start = base; region.end = base + size - 1; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 41b3b2787f23af9afe041e6c43e31f3f3839e58c..a1e336901cc83acd516065ca40dd4ba7c22d28f6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -466,6 +466,8 @@ static bool __init parse_cache_info(struct device_node *np, lsizep = of_get_property(np, propnames[3], NULL); if (bsizep == NULL) bsizep = lsizep; + if (lsizep == NULL) + lsizep = bsizep; if (lsizep != NULL) lsize = be32_to_cpu(*lsizep); if (bsizep != NULL) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index b75bf6e74209eb60b0a4c854034801a083fead45..3e8edb1387ccfc75f7cf19a058f1bab13247ee3e 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -469,8 +469,10 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, err |= __get_user(tsk->thread.ckpt_regs.ccr, &sc->gp_regs[PT_CCR]); + /* Don't allow userspace to set the trap value */ + regs->trap = 0; + /* These regs are not checkpointed; they can go in 'regs'. */ - err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b0cf4af7ba840ea3ac261fb92e8fc4524f17ee40..e4da937d6cf919918277166f80e44c24b9575a34 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -317,6 +317,12 @@ SECTIONS *(.branch_lt) } +#ifdef CONFIG_DEBUG_INFO_BTF + .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { + *(.BTF) + } +#endif + .opd : AT(ADDR(.opd) - LOAD_OFFSET) { *(.opd) } diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index 048b8e9f44928ebbe3d9892ae8f7154d56ff1954..63964af9a162ee24ec9a7e1ee0460d5632e54e12 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -400,7 +400,7 @@ _GLOBAL(set_context) * extern void loadcam_entry(unsigned int index) * * Load TLBCAM[index] entry in to the L2 CAM MMU - * Must preserve r7, r8, r9, and r10 + * Must preserve r7, r8, r9, r10 and r11 */ _GLOBAL(loadcam_entry) mflr r5 @@ -436,6 +436,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) */ _GLOBAL(loadcam_multi) mflr r8 + /* Don't switch to AS=1 if already there */ + mfmsr r11 + andi. r11,r11,MSR_IS + bne 10f /* * Set up temporary TLB entry that is the same as what we're @@ -461,6 +465,7 @@ _GLOBAL(loadcam_multi) mtmsr r6 isync +10: mr r9,r3 add r10,r3,r4 2: bl loadcam_entry @@ -469,6 +474,10 @@ _GLOBAL(loadcam_multi) mr r3,r9 blt 2b + /* Don't return to AS=0 if we were in AS=1 at function start */ + andi. r11,r11,MSR_IS + bne 3f + /* Return to AS=0 and clear the temporary entry */ mfmsr r6 rlwinm. r6,r6,0,~(MSR_IS|MSR_DS) @@ -484,6 +493,7 @@ _GLOBAL(loadcam_multi) tlbwe isync +3: mtlr r8 blr #endif diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index b7f937563827d91a3f0e331d1b87f5a7ed536787..d1fee2d35b49c6d24552b52728d37916f8046b2d 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -299,23 +299,6 @@ static int __init maple_probe(void) return 1; } -define_machine(maple) { - .name = "Maple", - .probe = maple_probe, - .setup_arch = maple_setup_arch, - .init_IRQ = maple_init_IRQ, - .pci_irq_fixup = maple_pci_irq_fixup, - .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, - .restart = maple_restart, - .halt = maple_halt, - .get_boot_time = maple_get_boot_time, - .set_rtc_time = maple_set_rtc_time, - .get_rtc_time = maple_get_rtc_time, - .calibrate_decr = generic_calibrate_decr, - .progress = maple_progress, - .power_save = power4_idle, -}; - #ifdef CONFIG_EDAC /* * Register a platform device for CPC925 memory controller on @@ -372,3 +355,20 @@ static int __init maple_cpc925_edac_setup(void) } machine_device_initcall(maple, maple_cpc925_edac_setup); #endif + +define_machine(maple) { + .name = "Maple", + .probe = maple_probe, + .setup_arch = maple_setup_arch, + .init_IRQ = maple_init_IRQ, + .pci_irq_fixup = maple_pci_irq_fixup, + .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, + .restart = maple_restart, + .halt = maple_halt, + .get_boot_time = maple_get_boot_time, + .set_rtc_time = maple_set_rtc_time, + .get_rtc_time = maple_get_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = maple_progress, + .power_save = power4_idle, +}; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d3d5796f7df603da10143e87d40d1069eacc3f42..36ef504eeab32de5e63e9ba1274edb097ef5d9a8 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1523,6 +1523,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + if (pdn->m64_single_mode) pe_num = pdn->pe_num_map[vf_index]; else @@ -1535,13 +1539,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe->pbus = NULL; pe->parent_dev = pdev; pe->mve_number = -1; - pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | - pci_iov_virtfn_devfn(pdev, vf_index); + pe->rid = (vf_bus << 8) | vf_devfn; pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", hose->global_number, pdev->bus->number, - PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), - PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ @@ -1555,6 +1557,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) list_add_tail(&pe->list, &phb->ioda.pe_list); mutex_unlock(&phb->ioda.pe_list_mutex); + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + pnv_pci_ioda2_setup_dma_pe(phb, pe); } } diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 961c131a5b7e8cf0c5a234898dd9df7a6528916a..844ca1886063bd25b6843a7311e5bd80309d20cd 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -978,16 +978,12 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) struct pnv_phb *phb = hose->private_data; #ifdef CONFIG_PCI_IOV struct pnv_ioda_pe *pe; - struct pci_dn *pdn; /* Fix the VF pdn PE number */ if (pdev->is_virtfn) { - pdn = pci_get_pdn(pdev); - WARN_ON(pdn->pe_number != IODA_INVALID_PE); list_for_each_entry(pe, &phb->ioda.pe_list, list) { if (pe->rid == ((pdev->bus->number << 8) | (pdev->devfn & 0xff))) { - pdn->pe_number = pe->pe_number; pe->pdev = pdev; break; } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index c0ae3847b8db55f7eca8184851b7b99f5a598249..215b14a373cb4a09238a00d312c0b5ddd4546aa7 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1060,7 +1060,7 @@ static int __init vpa_debugfs_init(void) { char name[16]; long i; - static struct dentry *vpa_dir; + struct dentry *vpa_dir; if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a820370883d9348c6fa43835b0e6f821f9cb3fc7..b7ae5a027714eabb75002033d431d195b0e13443 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -72,13 +72,6 @@ static u32 xive_ipi_irq; /* Xive state for each CPU */ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); -/* - * A "disabled" interrupt should never fire, to catch problems - * we set its logical number to this - */ -#define XIVE_BAD_IRQ 0x7fffffff -#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) - /* An invalid CPU target */ #define XIVE_INVALID_TARGET (-1) @@ -1073,7 +1066,7 @@ static int xive_setup_cpu_ipi(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); /* Check if we are already setup */ - if (xc->hw_ipi != 0) + if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; /* Grab an IPI from the backend, this will populate xc->hw_ipi */ @@ -1110,7 +1103,7 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) /* Disable the IPI and free the IRQ data */ /* Already cleaned up ? */ - if (xc->hw_ipi == 0) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; /* Mask the IPI */ @@ -1266,6 +1259,7 @@ static int xive_prepare_cpu(unsigned int cpu) if (np) xc->chip_id = of_get_ibm_chip_id(np); of_node_put(np); + xc->hw_ipi = XIVE_BAD_IRQ; per_cpu(xive_cpu, cpu) = xc; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index aac61374afebf581cae336b2de48f277f5a5ed8e..30cdcbfa1c04e519720d802194b53330700981eb 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -310,7 +310,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) s64 rc; /* Free the IPI */ - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; for (;;) { rc = opal_xive_free_irq(xc->hw_ipi); @@ -318,7 +318,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) msleep(1); continue; } - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; break; } } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 7fc41bf30fd594f1c7d9515cf40c2de833e89828..10235098a726660c97eee2c9c7f618487acce0d1 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -443,11 +443,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) { - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; xive_irq_bitmap_free(xc->hw_ipi); - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index f34abed0c05fd3064bf0a2c45fb193b49079283f..48808dbb25dce60df08cf38ae7456619338c26cb 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -9,6 +9,13 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H +/* + * A "disabled" interrupt should never fire, to catch problems + * we set its logical number to this + */ +#define XIVE_BAD_IRQ 0x7fffffff +#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) + /* Each CPU carry one of these with various per-CPU state */ struct xive_cpu { #ifdef CONFIG_SMP diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index a60c44b4a3e502c63b5c4e70042169f2b3246887..93974b0a5a99ef714c00774f1f8a50650b829a0d 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,9 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -# Avoid clang warnings around longjmp/setjmp declarations -subdir-ccflags-y := -ffreestanding - subdir-ccflags-$(CONFIG_PPC_WERROR) += -Werror GCOV_PROFILE := n diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 779c589b7089c955cf46f8d6dc4e44a5a054fdad..5f2e272895ff9c6841bd9a2eb111b5a7c884e41a 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -42,7 +42,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end); static inline void storage_key_init_range(unsigned long start, unsigned long end) { - if (PAGE_DEFAULT_KEY) + if (PAGE_DEFAULT_KEY != 0) __storage_key_init_range(start, end); } diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 2dc9eb4e1acca26f09f5aad4d2cb45a5e19833a2..b6a4ce9dafafbfbbf96cfaab42e4348a5277ca39 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -155,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk) static inline unsigned long long get_tod_clock(void) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; + char clk[STORE_CLOCK_EXT_SIZE]; get_tod_clock_ext(clk); return *((unsigned long long *)&clk[1]); diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 53a5316cc4b7e557b8eb4b71aa78738d2d2ecf11..4c7cf8787a8489af80b746af4a02cd89f949b9d7 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -79,7 +79,7 @@ static int show_diag_stat(struct seq_file *m, void *v) static void *show_diag_stat_start(struct seq_file *m, loff_t *pos) { - return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; + return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL; } static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos) @@ -128,7 +128,7 @@ void diag_stat_inc(enum diag_stat_enum nr) } EXPORT_SYMBOL(diag_stat_inc); -void diag_stat_inc_norecursion(enum diag_stat_enum nr) +void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr) { this_cpu_inc(diag_stat.counter[nr]); trace_s390_diagnose_norecursion(diag_map[nr].code); diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 27110f3294edcdf30935048d5553f712caf44116..0cfd5a83a1daa9d32127454b5fa18609076b094b 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -25,6 +25,12 @@ ENTRY(ftrace_stub) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#ifdef __PACK_STACK +/* allocate just enough for r14, r15 and backchain */ +#define TRACED_FUNC_FRAME_SIZE 24 +#else +#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD +#endif ENTRY(_mcount) BR_EX %r14 @@ -38,9 +44,16 @@ ENTRY(ftrace_caller) #ifndef CC_USING_HOTPATCH aghi %r0,MCOUNT_RETURN_FIXUP #endif - aghi %r15,-STACK_FRAME_SIZE + # allocate stack frame for ftrace_caller to contain traced function + aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) + stg %r0,(__SF_GPRS+8*8)(%r15) + stg %r15,(__SF_GPRS+9*8)(%r15) + # allocate pt_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + aghi %r1,-TRACED_FUNC_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(STACK_PTREGS_PSW+8)(%r15) stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 6fe2e1875058bec89419d680bef0110249ba9ed3..675d4be0c2b77f18e53865bc57c05d9244fc3137 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -157,8 +157,9 @@ static void show_cpu_mhz(struct seq_file *m, unsigned long n) static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; + unsigned long first = cpumask_first(cpu_online_mask); - if (!n) + if (n == first) show_cpu_summary(m, v); if (!machine_has_cpu_mhz) return 0; @@ -171,6 +172,8 @@ static inline void *c_update(loff_t *pos) { if (*pos) *pos = cpumask_next(*pos - 1, cpu_online_mask); + else + *pos = cpumask_first(cpu_online_mask); return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL; } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b649a6538350da6f034200b137ba8138e6cb348c..808f4fbe869e7bc59fed410fbf12b05721e18a3a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -406,7 +406,7 @@ int smp_find_processor_id(u16 address) return -1; } -bool arch_vcpu_is_preempted(int cpu) +bool notrace arch_vcpu_is_preempted(int cpu) { if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) return false; @@ -416,7 +416,7 @@ bool arch_vcpu_is_preempted(int cpu) } EXPORT_SYMBOL(arch_vcpu_is_preempted); -void smp_yield_cpu(int cpu) +void notrace smp_yield_cpu(int cpu) { if (MACHINE_HAS_DIAG9C) { diag_stat_inc_norecursion(DIAG_STAT_X09C); diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c index 490b52e85014538937d0c67e73a8bf6bf11c7cf9..11a669f3cc93c17d5f69f281296e04c2a39cc2bb 100644 --- a/arch/s390/kernel/trace.c +++ b/arch/s390/kernel/trace.c @@ -14,7 +14,7 @@ EXPORT_TRACEPOINT_SYMBOL(s390_diagnose); static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth); -void trace_s390_diagnose_norecursion(int diag_nr) +void notrace trace_s390_diagnose_norecursion(int diag_nr) { unsigned long flags; unsigned int *depth; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 28f3796d23c8f884b17c8a299c359197eb1f1564..61d25e2c82efacc0ab02900bf0c94b1e509b2d38 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1913,7 +1913,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&fi->ais_lock); ais.simm = fi->simm; @@ -2214,7 +2214,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) int ret = 0; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) return -EFAULT; @@ -2294,7 +2294,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_ais_all ais; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais))) return -EFAULT; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 061906f98dc5cc95cd3c965535d0dd9cb2083a9e..0120383219c038494ef3b1075dcad48d982d923c 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1027,6 +1027,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->iprcc = PGM_ADDRESSING; scb_s->pgmilc = 4; scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4); + rc = 1; } return rc; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index ec9292917d3f24852ec931c5abb82eecc2e516a2..a29d2e88b00ef7ebb71f5736e8343e182a523446 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -762,14 +762,18 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start, static inline unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { + const int asce_type = gmap->asce & _ASCE_TYPE_MASK; unsigned long *table; if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) return NULL; if (gmap_is_shadow(gmap) && gmap->removed) return NULL; - if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11))) + + if (asce_type != _ASCE_TYPE_REGION1 && + gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) return NULL; + table = gmap->table; switch (gmap->asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: @@ -1683,6 +1687,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, goto out_free; } else if (*table & _REGION_ENTRY_ORIGIN) { rc = -EAGAIN; /* Race with shadow */ + goto out_free; } crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h index d516e5d488180db7b83bd7bc9cf8c0e80f75f4cf..b887cc402b71255712c54b29b26100f7b24c99fa 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h @@ -78,8 +78,15 @@ enum { GPIO_FN_WDTOVF, /* CAN */ - GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1, - GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2, GPIO_FN_CRX2, + GPIO_FN_CTX1, GPIO_FN_CRX1, + GPIO_FN_CTX0, GPIO_FN_CRX0, + GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1, + GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20, + GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20, /* DMAC */ GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0, diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 5a2344574f39bc6ade2e2a8b3f8381f47362643c..4323dc4ae4c7a1a41c896fe1d3ec3789c3f250d1 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -167,12 +167,14 @@ SECTIONS } PERCPU_SECTION(SMP_CACHE_BYTES) -#ifdef CONFIG_JUMP_LABEL . = ALIGN(PAGE_SIZE); .exit.text : { EXIT_TEXT } -#endif + + .exit.data : { + EXIT_DATA + } . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fdc42d2bf7d99b91aadc6f00553702cc87279e25..a9b831061e84fd37aec13db40019ceff7b62e442 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -70,6 +70,7 @@ config X86 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 + select ARCH_SUPPORTS_LTO_CLANG if X86_64 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS @@ -172,7 +173,7 @@ config X86 select HAVE_RCU_TABLE_INVALIDATE if HAVE_RCU_TABLE_FREE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION - select HAVE_STACK_VALIDATION if X86_64 + select HAVE_STACK_VALIDATION if X86_64 && !LTO_CLANG select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 6ccc65edf7e34aedf98904021a315d7f20d40f5e..6449fc9f55d2ce69d0ab1745dfb5e4ae3c468f85 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -236,6 +236,11 @@ ifdef CONFIG_X86_64 LDFLAGS += $(call ld-option, -z max-page-size=0x200000) endif +ifdef CONFIG_LTO_CLANG +KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \ + -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) +endif + # Speed up the build KBUILD_CFLAGS += -pipe # Workaround for a gcc prelease that unfortunately was shipped in a suse release diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 37380c0d59996b8ed688d0e350111343e5dca0ae..01d628ea34024c1a88192a2a19e12920bceaf196 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -106,7 +106,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx cmpl $LOAD_PHYSICAL_ADDR, %ebx - jge 1f + jae 1f #endif movl $LOAD_PHYSICAL_ADDR, %ebx 1: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 39fdede523f216042af652ce8098078b2706cec2..a25127916e67973faa03b06ed4f869beb86e4a68 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -105,7 +105,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx cmpl $LOAD_PHYSICAL_ADDR, %ebx - jge 1f + jae 1f #endif movl $LOAD_PHYSICAL_ADDR, %ebx 1: @@ -280,7 +280,7 @@ ENTRY(startup_64) notq %rax andq %rax, %rbp cmpq $LOAD_PHYSICAL_ADDR, %rbp - jge 1f + jae 1f #endif movq $LOAD_PHYSICAL_ADDR, %rbp 1: diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index e691ff734cb5adb5c9e5ff2c0da1c4bd790617ee..46573842d8c3ecb80a278be92ecbcfd91c02d02c 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -36,9 +36,6 @@ #define __PAGE_OFFSET __PAGE_OFFSET_BASE #include "../../mm/ident_map.c" -/* Used by pgtable.h asm code to force instruction serialization. */ -unsigned long __force_order; - /* Used to track our page table allocation area. */ struct alloc_pgt_data { unsigned char *pgt_buf; diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index d876f450f077f09d4b61b5d2b277d6237135621d..a7f60e3d5ddececa0c1f7b623d0af82700667c14 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -17,7 +17,6 @@ CONFIG_CGROUPS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y @@ -27,7 +26,10 @@ CONFIG_NAMESPACES=y # CONFIG_PID_NS is not set CONFIG_SCHED_TUNE=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_RD_LZ4 is not set +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set CONFIG_KALLSYMS_ALL=y # CONFIG_PCSPKR_PLATFORM is not set CONFIG_BPF_SYSCALL=y @@ -36,9 +38,10 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=y -CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y +CONFIG_LTO_CLANG=y +CONFIG_CFI_CLANG=y CONFIG_REFCOUNT_FULL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -324,6 +327,8 @@ CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_HPET=y # CONFIG_HPET_MMAP_DEFAULT is not set +CONFIG_TCG_TPM=y +CONFIG_TCG_VTPM_PROXY=y # CONFIG_DEVPORT is not set # CONFIG_ACPI_I2C_OPREGION is not set # CONFIG_I2C_COMPAT is not set @@ -380,6 +385,7 @@ CONFIG_HID_MAGICMOUSE=y CONFIG_HID_MICROSOFT=y CONFIG_HID_MONTEREY=y CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NINTENDO=y CONFIG_HID_NTRIG=y CONFIG_HID_ORTEK=y CONFIG_HID_PANTHERLORD=y @@ -468,6 +474,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y +CONFIG_UNICODE=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_WARN_DEPRECATED is not set @@ -481,6 +488,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_PANIC_TIMEOUT=5 CONFIG_SCHEDSTATS=y +CONFIG_DEBUG_LIST=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y CONFIG_TEST_MEMINIT=y diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index c690ddc78c03631f56c84e0ce19d408bc94a630d..6dc0af7f2388e082872f282efaafde30afaaf1ee 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -74,10 +74,8 @@ struct aesni_xts_ctx { asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); -asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in); -asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in); +asmlinkage void aesni_enc(void *ctx, u8 *out, const u8 *in); +asmlinkage void aesni_dec(void *ctx, u8 *out, const u8 *in); asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len); asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out, diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 9f712a7dfd797cc499e1ef52ba7999078530494a..7e578fa5d0a7423f2804a5706d86529736e8a278 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -62,11 +62,11 @@ *Visit http://software.intel.com/en-us/articles/ *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/ * - *Updates 20-byte SHA-1 record in 'hash' for even number of - *'num_blocks' consecutive 64-byte blocks + *Updates 20-byte SHA-1 record at start of 'state', from 'input', for + *even number of 'blocks' consecutive 64-byte blocks. * *extern "C" void sha1_transform_avx2( - * int *hash, const char* input, size_t num_blocks ); + * struct sha1_state *state, const u8* input, int blocks ); */ #include diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 6204bd53528c65c0d4a70f05e76a60bcc476499e..5fc0bf7e6a0303f305a6641ef5df9c754517be24 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -461,9 +461,13 @@ W_PRECALC_SSSE3 movdqu \a,\b .endm -/* SSSE3 optimized implementation: - * extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws, - * unsigned int rounds); +/* + * SSSE3 optimized implementation: + * + * extern "C" void sha1_transform_ssse3(struct sha1_state *state, + * const u8 *data, int blocks); + * + * Note that struct sha1_state is assumed to begin with u32 state[5]. */ SHA1_VECTOR_ASM sha1_transform_ssse3 @@ -549,8 +553,8 @@ W_PRECALC_AVX /* AVX optimized implementation: - * extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws, - * unsigned int rounds); + * extern "C" void sha1_transform_avx(struct sha1_state *state, + * const u8 *data, int blocks); */ SHA1_VECTOR_ASM sha1_transform_avx diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index fc61739150e7c2c5a484c020b14441551fe5403a..9691962756a7efd998907738974f25278381ed15 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -31,11 +31,8 @@ #include #include -typedef void (sha1_transform_fn)(u32 *digest, const char *data, - unsigned int rounds); - static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha1_transform_fn *sha1_xform) + unsigned int len, sha1_block_fn *sha1_xform) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -43,48 +40,47 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) return crypto_sha1_update(desc, data, len); - /* make sure casting to sha1_block_fn() is safe */ + /* + * Make sure struct sha1_state begins directly with the SHA1 + * 160-bit internal state, as this is what the asm functions expect. + */ BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); kernel_fpu_begin(); - sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_xform); + sha1_base_do_update(desc, data, len, sha1_xform); kernel_fpu_end(); return 0; } static int sha1_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out, sha1_transform_fn *sha1_xform) + unsigned int len, u8 *out, sha1_block_fn *sha1_xform) { if (!irq_fpu_usable()) return crypto_sha1_finup(desc, data, len, out); kernel_fpu_begin(); if (len) - sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_xform); - sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_xform); + sha1_base_do_update(desc, data, len, sha1_xform); + sha1_base_do_finalize(desc, sha1_xform); kernel_fpu_end(); return sha1_base_finish(desc, out); } -asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_transform_ssse3(struct sha1_state *state, + const u8 *data, int blocks); static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_transform_ssse3); + return sha1_update(desc, data, len, sha1_transform_ssse3); } static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_transform_ssse3); + return sha1_finup(desc, data, len, out, sha1_transform_ssse3); } /* Add padding and return the message digest. */ @@ -124,21 +120,19 @@ static void unregister_sha1_ssse3(void) } #ifdef CONFIG_AS_AVX -asmlinkage void sha1_transform_avx(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_transform_avx(struct sha1_state *state, + const u8 *data, int blocks); static int sha1_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_transform_avx); + return sha1_update(desc, data, len, sha1_transform_avx); } static int sha1_avx_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_transform_avx); + return sha1_finup(desc, data, len, out, sha1_transform_avx); } static int sha1_avx_final(struct shash_desc *desc, u8 *out) @@ -196,8 +190,8 @@ static inline void unregister_sha1_avx(void) { } #if defined(CONFIG_AS_AVX2) && (CONFIG_AS_AVX) #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ -asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_transform_avx2(struct sha1_state *state, + const u8 *data, int blocks); static bool avx2_usable(void) { @@ -209,28 +203,26 @@ static bool avx2_usable(void) return false; } -static void sha1_apply_transform_avx2(u32 *digest, const char *data, - unsigned int rounds) +static void sha1_apply_transform_avx2(struct sha1_state *state, + const u8 *data, int blocks) { /* Select the optimal transform based on data block size */ - if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) - sha1_transform_avx2(digest, data, rounds); + if (blocks >= SHA1_AVX2_BLOCK_OPTSIZE) + sha1_transform_avx2(state, data, blocks); else - sha1_transform_avx(digest, data, rounds); + sha1_transform_avx(state, data, blocks); } static int sha1_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_apply_transform_avx2); + return sha1_update(desc, data, len, sha1_apply_transform_avx2); } static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_apply_transform_avx2); + return sha1_finup(desc, data, len, out, sha1_apply_transform_avx2); } static int sha1_avx2_final(struct shash_desc *desc, u8 *out) @@ -274,21 +266,19 @@ static inline void unregister_sha1_avx2(void) { } #endif #ifdef CONFIG_AS_SHA1_NI -asmlinkage void sha1_ni_transform(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data, + int rounds); static int sha1_ni_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_ni_transform); + return sha1_update(desc, data, len, sha1_ni_transform); } static int sha1_ni_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_ni_transform); + return sha1_finup(desc, data, len, out, sha1_ni_transform); } static int sha1_ni_final(struct shash_desc *desc, u8 *out) diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 001bbcf93c79ba08628abcf30dd49ad77158420d..b6e037ee66613eadc410a956b4aed01be422870f 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -341,8 +341,8 @@ a = TMP_ .endm ######################################################################## -## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) -## arg 1 : pointer to digest +## void sha256_transform_avx(state sha256_state *state, const u8 *data, int blocks) +## arg 1 : pointer to state ## arg 2 : pointer to input data ## arg 3 : Num blocks ######################################################################## diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 1420db15dcddc8505b57faad5e6e6701f972b517..2e6ebc904a3a9fb5ff718d818f0539477b7d8d71 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -520,8 +520,8 @@ STACK_SIZE = _RSP + _RSP_SIZE .endm ######################################################################## -## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) -## arg 1 : pointer to digest +## void sha256_transform_rorx(struct sha256_state *state, const u8 *data, int blocks) +## arg 1 : pointer to state ## arg 2 : pointer to input data ## arg 3 : Num blocks ######################################################################## diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index c6c05ed2c16a593390ddf7617070f45e58a40ad4..ab7d9f05ff789399ad1feee5f77c3ea601e201e9 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -347,8 +347,10 @@ a = TMP_ .endm ######################################################################## -## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) -## arg 1 : pointer to digest +## void sha256_transform_ssse3(struct sha256_state *state, const u8 *data, +## int blocks); +## arg 1 : pointer to state +## (struct sha256_state is assumed to begin with u32 state[8]) ## arg 2 : pointer to input data ## arg 3 : Num blocks ######################################################################## diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 9e79baf03a4b206ced0e5e2b0a1f055cd0ca73c6..2fd8bdaa66046d62982a669f8827272894c89622 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -40,12 +40,11 @@ #include #include -asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, - u64 rounds); -typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds); +asmlinkage void sha256_transform_ssse3(struct sha256_state *state, + const u8 *data, int blocks); -static int sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha256_transform_fn *sha256_xform) +static int _sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha256_block_fn *sha256_xform) { struct sha256_state *sctx = shash_desc_ctx(desc); @@ -53,28 +52,29 @@ static int sha256_update(struct shash_desc *desc, const u8 *data, (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) return crypto_sha256_update(desc, data, len); - /* make sure casting to sha256_block_fn() is safe */ + /* + * Make sure struct sha256_state begins directly with the SHA256 + * 256-bit internal state, as this is what the asm functions expect. + */ BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); kernel_fpu_begin(); - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_xform); + sha256_base_do_update(desc, data, len, sha256_xform); kernel_fpu_end(); return 0; } static int sha256_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out, sha256_transform_fn *sha256_xform) + unsigned int len, u8 *out, sha256_block_fn *sha256_xform) { if (!irq_fpu_usable()) return crypto_sha256_finup(desc, data, len, out); kernel_fpu_begin(); if (len) - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_xform); - sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_xform); + sha256_base_do_update(desc, data, len, sha256_xform); + sha256_base_do_finalize(desc, sha256_xform); kernel_fpu_end(); return sha256_base_finish(desc, out); @@ -83,7 +83,7 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data, static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_transform_ssse3); + return _sha256_update(desc, data, len, sha256_transform_ssse3); } static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, @@ -146,13 +146,13 @@ static void unregister_sha256_ssse3(void) } #ifdef CONFIG_AS_AVX -asmlinkage void sha256_transform_avx(u32 *digest, const char *data, - u64 rounds); +asmlinkage void sha256_transform_avx(struct sha256_state *state, + const u8 *data, int blocks); static int sha256_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_transform_avx); + return _sha256_update(desc, data, len, sha256_transform_avx); } static int sha256_avx_finup(struct shash_desc *desc, const u8 *data, @@ -230,13 +230,13 @@ static inline void unregister_sha256_avx(void) { } #endif #if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) -asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, - u64 rounds); +asmlinkage void sha256_transform_rorx(struct sha256_state *state, + const u8 *data, int blocks); static int sha256_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_transform_rorx); + return _sha256_update(desc, data, len, sha256_transform_rorx); } static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data, @@ -312,13 +312,13 @@ static inline void unregister_sha256_avx2(void) { } #endif #ifdef CONFIG_AS_SHA256_NI -asmlinkage void sha256_ni_transform(u32 *digest, const char *data, - u64 rounds); /*unsigned int rounds);*/ +asmlinkage void sha256_ni_transform(struct sha256_state *digest, + const u8 *data, int rounds); static int sha256_ni_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_ni_transform); + return _sha256_update(desc, data, len, sha256_ni_transform); } static int sha256_ni_finup(struct shash_desc *desc, const u8 *data, diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 39235fefe6f7c0c8e15c39c5828d9ff93e2609d6..8f6fe09cba54601f921811e2a09ca2405d74beaf 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -271,11 +271,12 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE .endm ######################################################################## -# void sha512_transform_avx(void* D, const void* M, u64 L) -# Purpose: Updates the SHA512 digest stored at D with the message stored in M. -# The size of the message pointed to by M must be an integer multiple of SHA512 -# message blocks. -# L is the message length in SHA512 blocks +# void sha512_transform_avx(sha512_state *state, const u8 *data, int blocks) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks ######################################################################## ENTRY(sha512_transform_avx) cmp $0, msglen diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index b16d560051629a1c6d518321b93be2672b861c9e..43d4d641804c73752b5c131282f755a167b2dce7 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -563,11 +563,12 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE .endm ######################################################################## -# void sha512_transform_rorx(void* D, const void* M, uint64_t L)# -# Purpose: Updates the SHA512 digest stored at D with the message stored in M. -# The size of the message pointed to by M must be an integer multiple of SHA512 -# message blocks. -# L is the message length in SHA512 blocks +# void sha512_transform_rorx(sha512_state *state, const u8 *data, int blocks) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks ######################################################################## ENTRY(sha512_transform_rorx) # Allocate Stack Space diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index 66bbd9058a90d67f51a44340e66ddfcd3801ec60..46da903f5538f4d9470154fde12338935328518c 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -269,11 +269,14 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE .endm ######################################################################## -# void sha512_transform_ssse3(void* D, const void* M, u64 L)# -# Purpose: Updates the SHA512 digest stored at D with the message stored in M. -# The size of the message pointed to by M must be an integer multiple of SHA512 -# message blocks. -# L is the message length in SHA512 blocks. +## void sha512_transform_ssse3(struct sha512_state *state, const u8 *data, +## int blocks); +# (struct sha512_state is assumed to begin with u64 state[8]) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks. ######################################################################## ENTRY(sha512_transform_ssse3) diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 2b0e2a6825f337f2defe72d6a4bc7760ee9b587b..1db6c4ed53cd1893abf009616cd1b50b64192451 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -39,13 +39,11 @@ #include -asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, - u64 rounds); - -typedef void (sha512_transform_fn)(u64 *digest, const char *data, u64 rounds); +asmlinkage void sha512_transform_ssse3(struct sha512_state *state, + const u8 *data, int blocks); static int sha512_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha512_transform_fn *sha512_xform) + unsigned int len, sha512_block_fn *sha512_xform) { struct sha512_state *sctx = shash_desc_ctx(desc); @@ -53,28 +51,29 @@ static int sha512_update(struct shash_desc *desc, const u8 *data, (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) return crypto_sha512_update(desc, data, len); - /* make sure casting to sha512_block_fn() is safe */ + /* + * Make sure struct sha512_state begins directly with the SHA512 + * 512-bit internal state, as this is what the asm functions expect. + */ BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0); kernel_fpu_begin(); - sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_xform); + sha512_base_do_update(desc, data, len, sha512_xform); kernel_fpu_end(); return 0; } static int sha512_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out, sha512_transform_fn *sha512_xform) + unsigned int len, u8 *out, sha512_block_fn *sha512_xform) { if (!irq_fpu_usable()) return crypto_sha512_finup(desc, data, len, out); kernel_fpu_begin(); if (len) - sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_xform); - sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_xform); + sha512_base_do_update(desc, data, len, sha512_xform); + sha512_base_do_finalize(desc, sha512_xform); kernel_fpu_end(); return sha512_base_finish(desc, out); @@ -146,8 +145,8 @@ static void unregister_sha512_ssse3(void) } #ifdef CONFIG_AS_AVX -asmlinkage void sha512_transform_avx(u64 *digest, const char *data, - u64 rounds); +asmlinkage void sha512_transform_avx(struct sha512_state *state, + const u8 *data, int blocks); static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { @@ -229,8 +228,8 @@ static inline void unregister_sha512_avx(void) { } #endif #if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) -asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, - u64 rounds); +asmlinkage void sha512_transform_rorx(struct sha512_state *state, + const u8 *data, int blocks); static int sha512_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 389800344f6975f7381d4edc737c8aff09336ae8..ba5698dca9698592023a7baf429f5a8e5f4e46af 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -271,7 +271,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) } #ifdef CONFIG_X86_64 -__visible void do_syscall_64(struct pt_regs *regs) +__nocfi __visible void do_syscall_64(struct pt_regs *regs) { struct thread_info *ti = current_thread_info(); unsigned long nr = regs->orig_ax; @@ -305,7 +305,7 @@ __visible void do_syscall_64(struct pt_regs *regs) * extremely hot in workloads that use it, and it's usually called from * do_fast_syscall_32, so forcibly inline it to improve performance. */ -static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) +static __nocfi __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) { struct thread_info *ti = current_thread_info(); unsigned int nr = (unsigned int)regs->orig_ax; diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 49adabd94f883faf7f9c93b9cbd1121e4c1243d4..c19974a493784e1cc40f75452a96179f1010625a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1057,6 +1057,7 @@ ENTRY(int3) END(int3) ENTRY(general_protection) + ASM_CLAC pushl $do_general_protection jmp common_exception END(general_protection) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index ab7f730cf7f22172ab99cda4219c1e598245121f..d64dd8cbee3b845e8d98fc79cc3e3f4eaec6fe00 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,7 +3,6 @@ # Building vDSO images for x86. # -KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y @@ -72,7 +71,7 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(DISABLE_LTO) ifdef CONFIG_RETPOLINE ifneq ($(RETPOLINE_VDSO_CFLAGS),) @@ -150,6 +149,8 @@ KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(LTO_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(CFI_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 42d4c89f990ed1d752b24c641329671587313ba0..ddff0ca6f50981bfc53cfae86d81e9d2200a92a0 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index defb536aebce2144db706672e62d3e5ee82fdbe3..c3ec535fd36b84d4e92c1ccea8d6d3ce64073ce9 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -245,6 +245,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index baa7e36073f907b19a983459aa15e66a682b9f95..604a8558752d1d46dea98b9cf3238e82c8ecaeb6 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -193,20 +193,18 @@ static int amd_uncore_event_init(struct perf_event *event) /* * NB and Last level cache counters (MSRs) are shared across all cores - * that share the same NB / Last level cache. Interrupts can be directed - * to a single target core, however, event counts generated by processes - * running on other cores cannot be masked out. So we do not support - * sampling and per-thread events. + * that share the same NB / Last level cache. On family 16h and below, + * Interrupts can be directed to a single target core, however, event + * counts generated by processes running on other cores cannot be masked + * out. So we do not support sampling and per-thread events via + * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL; /* NB and Last level cache counters do not have usr/os/guest/host bits */ if (event->attr.exclude_user || event->attr.exclude_kernel || event->attr.exclude_host || event->attr.exclude_guest) return -EINVAL; - /* and we do not enable counter overflow interrupts */ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; @@ -314,6 +312,7 @@ static struct pmu amd_nb_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, }; static struct pmu amd_llc_pmu = { @@ -324,6 +323,7 @@ static struct pmu amd_llc_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, }; static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 3310f9f6c3e14f5aa6193a4568103a553e9f62c1..550b7814ef92b12b26a7f3e86ee8a4179285abda 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1368,6 +1368,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) old = ((s64)(prev_raw_count << shift) >> shift); local64_add(new - old + count * period, &event->count); + local64_set(&hwc->period_left, -new); + perf_event_update_userpage(event); return 0; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b4bef819d5d5365039d0039506d34503bee774ff..157cfaf1064c9e874185a0ee154f2e975c5835b3 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -205,7 +205,7 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ - +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d0e17813a9b0a558b36c3cac76fdded9924b6601..9529fe69e1d927bb43199d0e8d1a57b4c2617f89 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1006,7 +1006,7 @@ struct kvm_x86_ops { void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); - void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(struct kvm_vcpu *vcpu); @@ -1032,7 +1032,7 @@ struct kvm_x86_ops { bool (*mpx_supported)(void); bool (*xsaves_supported)(void); - int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + int (*check_nested_events)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 209492849566cefc56305cd8e976fbf4a25d7527..5c524d4f71cd7b2a99b7037f7962e334f227a94c 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -41,7 +41,7 @@ struct microcode_amd { unsigned int mpb[0]; }; -#define PATCH_MAX_SIZE PAGE_SIZE +#define PATCH_MAX_SIZE (3 * PAGE_SIZE) #ifdef CONFIG_MICROCODE_AMD extern void __init load_ucode_amd_bsp(unsigned int family); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 6a4b1a54ff479cf2ff018470351de45890625582..98a337e3835d68758297115cc6d9cbba8e5b657a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -588,12 +588,15 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return __pmd(val); } -/* mprotect needs to preserve PAT bits when updating vm_page_prot */ +/* + * mprotect needs to preserve PAT and encryption bits when updating + * vm_page_prot + */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) { pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; - pgprotval_t addbits = pgprot_val(newprot); + pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK; return __pgprot(preservebits | addbits); } diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 85f8279c885ac36290b3ec57ebeb11848e3be596..e6c870c240657ed6e53c41f408699d47381c9760 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -124,7 +124,7 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 5c019d23d06b1168da0ea965d7c35bebd4d02307..4c85f6db1e3df634adad2033e98899f9cb8bbb6b 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -6,7 +6,7 @@ #include extern char __brk_base[], __brk_limit[]; -extern struct exception_table_entry __stop___ex_table[]; +extern char __cfi_jt_start[], __cfi_jt_end[]; #if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6dda3595acf8571b750fb58236e889a5e991396f..40d7072be70984fae28a7881ff1f9fd397e9848a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1738,7 +1738,7 @@ int __acpi_acquire_global_lock(unsigned int *lock) new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); val = cmpxchg(lock, old, new); } while (unlikely (val != old)); - return (new < 3) ? -1 : 0; + return ((new & 0x3) < 3) ? -1 : 0; } int __acpi_release_global_lock(unsigned int *lock) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index dde437f5d14ff828dccff19275033ff2330acc60..596e7640d895a084c5b961503b8e3325f31cdca8 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -133,7 +133,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ - retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx); + retval = call_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx, + false); if (retval == 0) { /* Use the hint in CST */ percpu_entry->states[cx->index].eax = cx->address; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3d805e8b373965c048ef4e52c2ec523e26d6e149..7b4141889919083c9d4800c7d08b97708339f2d1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -360,7 +360,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) * cpuid bit to be set. We need to ensure that we * update that bit in this CPU's "cpu_info". */ - get_cpu_cap(c); + set_cpu_cap(c, X86_FEATURE_OSPKE); } #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 3f731d7f04bf65f2726bd7b0349a12c3b995d88c..07742b69d914cd27aa2d0e5396a796fe8f6009fa 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -135,6 +135,40 @@ struct rdt_resource rdt_resources_all[] = { .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, }, + [RDT_RESOURCE_L2DATA] = + { + .rid = RDT_RESOURCE_L2DATA, + .name = "L2DATA", + .domains = domain_init(RDT_RESOURCE_L2DATA), + .msr_base = IA32_L2_CBM_BASE, + .msr_update = cat_wrmsr, + .cache_level = 2, + .cache = { + .min_cbm_bits = 1, + .cbm_idx_mult = 2, + .cbm_idx_offset = 0, + }, + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, + }, + [RDT_RESOURCE_L2CODE] = + { + .rid = RDT_RESOURCE_L2CODE, + .name = "L2CODE", + .domains = domain_init(RDT_RESOURCE_L2CODE), + .msr_base = IA32_L2_CBM_BASE, + .msr_update = cat_wrmsr, + .cache_level = 2, + .cache = { + .min_cbm_bits = 1, + .cbm_idx_mult = 2, + .cbm_idx_offset = 1, + }, + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, + }, [RDT_RESOURCE_MBA] = { .rid = RDT_RESOURCE_MBA, @@ -259,15 +293,15 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) r->alloc_enabled = true; } -static void rdt_get_cdp_l3_config(int type) +static void rdt_get_cdp_config(int level, int type) { - struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_resource *r_l = &rdt_resources_all[level]; struct rdt_resource *r = &rdt_resources_all[type]; - r->num_closid = r_l3->num_closid / 2; - r->cache.cbm_len = r_l3->cache.cbm_len; - r->default_ctrl = r_l3->default_ctrl; - r->cache.shareable_bits = r_l3->cache.shareable_bits; + r->num_closid = r_l->num_closid / 2; + r->cache.cbm_len = r_l->cache.cbm_len; + r->default_ctrl = r_l->default_ctrl; + r->cache.shareable_bits = r_l->cache.shareable_bits; r->data_width = (r->cache.cbm_len + 3) / 4; r->alloc_capable = true; /* @@ -277,6 +311,18 @@ static void rdt_get_cdp_l3_config(int type) r->alloc_enabled = false; } +static void rdt_get_cdp_l3_config(void) +{ + rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA); + rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE); +} + +static void rdt_get_cdp_l2_config(void) +{ + rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA); + rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE); +} + static int get_cache_id(int cpu, int level) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); @@ -486,6 +532,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) d->id = id; cpumask_set_cpu(cpu, &d->cpu_mask); + rdt_domain_reconfigure_cdp(r); + if (r->alloc_capable && domain_setup_ctrlval(r, d)) { kfree(d); return; @@ -729,15 +777,15 @@ static __init bool get_rdt_alloc_resources(void) if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]); - if (rdt_cpu_has(X86_FEATURE_CDP_L3)) { - rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA); - rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE); - } + if (rdt_cpu_has(X86_FEATURE_CDP_L3)) + rdt_get_cdp_l3_config(); ret = true; } if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { /* CPUID 0x10.2 fields are same format at 0x10.1 */ rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]); + if (rdt_cpu_has(X86_FEATURE_CDP_L2)) + rdt_get_cdp_l2_config(); ret = true; } diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index a43a72d8e88e45ef76a2dee5e5a4abbd0ecab308..b43a786ec15f24e1111980ae9d204fdbad9c4e6e 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -7,12 +7,15 @@ #include #define IA32_L3_QOS_CFG 0xc81 +#define IA32_L2_QOS_CFG 0xc82 #define IA32_L3_CBM_BASE 0xc90 #define IA32_L2_CBM_BASE 0xd10 #define IA32_MBA_THRTL_BASE 0xd50 #define L3_QOS_CDP_ENABLE 0x01ULL +#define L2_QOS_CDP_ENABLE 0x01ULL + /* * Event IDs are used to program IA32_QM_EVTSEL before reading event * counter from IA32_QM_CTR @@ -354,6 +357,8 @@ enum { RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE, RDT_RESOURCE_L2, + RDT_RESOURCE_L2DATA, + RDT_RESOURCE_L2CODE, RDT_RESOURCE_MBA, /* Must be the last */ @@ -437,5 +442,6 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); +void rdt_domain_reconfigure_cdp(struct rdt_resource *r); #endif /* _ASM_X86_INTEL_RDT_H */ diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 0ec30b2384c052c8299d3d72acf395618f2706f8..60c63b23e3baa8351b933c21fde99dd6ad15d450 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -922,6 +922,7 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, kernfs_remove(kn); return ret; } + static void l3_qos_cfg_update(void *arg) { bool *enable = arg; @@ -929,8 +930,17 @@ static void l3_qos_cfg_update(void *arg) wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); } -static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) +static void l2_qos_cfg_update(void *arg) +{ + bool *enable = arg; + + wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); +} + +static int set_cache_qos_cfg(int level, bool enable) { + void (*update)(void *arg); + struct rdt_resource *r_l; cpumask_var_t cpu_mask; struct rdt_domain *d; int cpu; @@ -938,16 +948,24 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) return -ENOMEM; - list_for_each_entry(d, &r->domains, list) { + if (level == RDT_RESOURCE_L3) + update = l3_qos_cfg_update; + else if (level == RDT_RESOURCE_L2) + update = l2_qos_cfg_update; + else + return -EINVAL; + + r_l = &rdt_resources_all[level]; + list_for_each_entry(d, &r_l->domains, list) { /* Pick one CPU from each domain instance to update MSR */ cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); } cpu = get_cpu(); /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ if (cpumask_test_cpu(cpu, cpu_mask)) - l3_qos_cfg_update(&enable); + update(&enable); /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ - smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1); + smp_call_function_many(cpu_mask, update, &enable, 1); put_cpu(); free_cpumask_var(cpu_mask); @@ -955,52 +973,99 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) return 0; } -static int cdp_enable(void) +static int cdp_enable(int level, int data_type, int code_type) { - struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA]; - struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE]; - struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; + struct rdt_resource *r_lcode = &rdt_resources_all[code_type]; + struct rdt_resource *r_l = &rdt_resources_all[level]; int ret; - if (!r_l3->alloc_capable || !r_l3data->alloc_capable || - !r_l3code->alloc_capable) + if (!r_l->alloc_capable || !r_ldata->alloc_capable || + !r_lcode->alloc_capable) return -EINVAL; - ret = set_l3_qos_cfg(r_l3, true); + ret = set_cache_qos_cfg(level, true); if (!ret) { - r_l3->alloc_enabled = false; - r_l3data->alloc_enabled = true; - r_l3code->alloc_enabled = true; + r_l->alloc_enabled = false; + r_ldata->alloc_enabled = true; + r_lcode->alloc_enabled = true; } return ret; } -static void cdp_disable(void) +static int cdpl3_enable(void) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, + RDT_RESOURCE_L3CODE); +} + +static int cdpl2_enable(void) +{ + return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, + RDT_RESOURCE_L2CODE); +} + +static void cdp_disable(int level, int data_type, int code_type) +{ + struct rdt_resource *r = &rdt_resources_all[level]; r->alloc_enabled = r->alloc_capable; - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) { - rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled = false; - rdt_resources_all[RDT_RESOURCE_L3CODE].alloc_enabled = false; - set_l3_qos_cfg(r, false); + if (rdt_resources_all[data_type].alloc_enabled) { + rdt_resources_all[data_type].alloc_enabled = false; + rdt_resources_all[code_type].alloc_enabled = false; + set_cache_qos_cfg(level, false); } } +static void cdpl3_disable(void) +{ + cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE); +} + +static void cdpl2_disable(void) +{ + cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE); +} + +static void cdp_disable_all(void) +{ + if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) + cdpl3_disable(); + if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) + cdpl2_disable(); +} + static int parse_rdtgroupfs_options(char *data) { char *token, *o = data; int ret = 0; while ((token = strsep(&o, ",")) != NULL) { - if (!*token) - return -EINVAL; + if (!*token) { + ret = -EINVAL; + goto out; + } - if (!strcmp(token, "cdp")) - ret = cdp_enable(); + if (!strcmp(token, "cdp")) { + ret = cdpl3_enable(); + if (ret) + goto out; + } else if (!strcmp(token, "cdpl2")) { + ret = cdpl2_enable(); + if (ret) + goto out; + } else { + ret = -EINVAL; + goto out; + } } + return 0; + +out: + pr_err("Invalid mount option \"%s\"\n", token); + return ret; } @@ -1155,7 +1220,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, out_info: kernfs_remove(kn_info); out_cdp: - cdp_disable(); + cdp_disable_all(); out: mutex_unlock(&rdtgroup_mutex); cpus_read_unlock(); @@ -1322,7 +1387,7 @@ static void rdt_kill_sb(struct super_block *sb) /*Put everything back to default values. */ for_each_alloc_enabled_rdt_resource(r) reset_all_ctrls(r); - cdp_disable(); + cdp_disable_all(); rmdir_all_sub(); static_branch_disable_cpuslocked(&rdt_alloc_enable_key); static_branch_disable_cpuslocked(&rdt_mon_enable_key); @@ -1692,6 +1757,19 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, return ret; } +/* Restore the qos cfg state when a domain comes online */ +void rdt_domain_reconfigure_cdp(struct rdt_resource *r) +{ + if (!r->alloc_capable) + return; + + if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA]) + l2_qos_cfg_update(&r->alloc_enabled); + + if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA]) + l3_qos_cfg_update(&r->alloc_enabled); +} + /* * We allow creating mon groups only with in a directory called "mon_groups" * which is present in every ctrl_mon group. Check if this is a valid @@ -1840,7 +1918,8 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) * If the rdtgroup is a mon group and parent directory * is a valid "mon_groups" directory, remove the mon group. */ - if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) + if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && + rdtgrp != &rdtgroup_default) ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask); else if (rdtgrp->type == RDTMON_GROUP && is_mon_groups(parent_kn, kn->name)) diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index f12141ba9a76d59963d2f87e6a0177e86742237c..e57b59762f9f5f7b33126cea9de96c4dfd8eaf02 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -46,8 +46,6 @@ static struct mce i_mce; static struct dentry *dfs_inj; -static u8 n_banks; - #define MAX_FLAG_OPT_SIZE 4 #define NBCFG 0x44 @@ -570,9 +568,15 @@ static void do_inject(void) static int inj_bank_set(void *data, u64 val) { struct mce *m = (struct mce *)data; + u8 n_banks; + u64 cap; + + /* Get bank count on target CPU so we can handle non-uniform values. */ + rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap); + n_banks = cap & MCG_BANKCNT_MASK; if (val >= n_banks) { - pr_err("Non-existent MCE bank: %llu\n", val); + pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu); return -EINVAL; } @@ -665,10 +669,6 @@ static struct dfs_node { static int __init debugfs_init(void) { unsigned int i; - u64 cap; - - rdmsrl(MSR_IA32_MCG_CAP, cap); - n_banks = cap & MCG_BANKCNT_MASK; dfs_inj = debugfs_create_dir("mce-inject", NULL); if (!dfs_inj) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0b0e44f853931a86b4fa6ad7e0d4f264e945f68b..95c09db1bba214680433eac27543e1a955ad48a1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1499,13 +1499,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); static int __mcheck_cpu_mce_banks_init(void) { int i; - u8 num_banks = mca_cfg.banks; - mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL); + mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); if (!mce_banks) return -ENOMEM; - for (i = 0; i < num_banks; i++) { + for (i = 0; i < MAX_NR_BANKS; i++) { struct mce_bank *b = &mce_banks[i]; b->ctl = -1ULL; @@ -1519,28 +1518,19 @@ static int __mcheck_cpu_mce_banks_init(void) */ static int __mcheck_cpu_cap_init(void) { - unsigned b; u64 cap; + u8 b; rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (!mca_cfg.banks) - pr_info("CPU supports %d MCE banks\n", b); - - if (b > MAX_NR_BANKS) { - pr_warn("Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); + if (WARN_ON_ONCE(b > MAX_NR_BANKS)) b = MAX_NR_BANKS; - } - /* Don't support asymmetric configurations today */ - WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks); - mca_cfg.banks = b; + mca_cfg.banks = max(mca_cfg.banks, b); if (!mce_banks) { int err = __mcheck_cpu_mce_banks_init(); - if (err) return err; } @@ -2470,6 +2460,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key); static int __init mcheck_late_init(void) { + pr_info("Using %d MCE banks\n", mca_cfg.banks); + if (mca_cfg.recovery) static_branch_inc(&mcsafe_key); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index a8f47697276b60b676a6ca969cf607b509568abd..bbe94b682119ebb5913ab2ee5296dc9eb5337007 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1116,9 +1116,12 @@ static const struct sysfs_ops threshold_ops = { .store = store, }; +static void threshold_block_release(struct kobject *kobj); + static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_attrs = default_attrs, + .release = threshold_block_release, }; static const char *get_name(unsigned int bank, struct threshold_block *b) @@ -1151,8 +1154,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return buf_mcatype; } -static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, - unsigned int block, u32 address) +static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb, + unsigned int bank, unsigned int block, + u32 address) { struct threshold_block *b = NULL; u32 low, high; @@ -1196,16 +1200,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, INIT_LIST_HEAD(&b->miscj); - if (per_cpu(threshold_banks, cpu)[bank]->blocks) { - list_add(&b->miscj, - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - } else { - per_cpu(threshold_banks, cpu)[bank]->blocks = b; - } + if (tb->blocks) + list_add(&b->miscj, &tb->blocks->miscj); + else + tb->blocks = b; - err = kobject_init_and_add(&b->kobj, &threshold_ktype, - per_cpu(threshold_banks, cpu)[bank]->kobj, - get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); if (err) goto out_free; recurse: @@ -1213,7 +1213,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (!address) return 0; - err = allocate_threshold_blocks(cpu, bank, block, address); + err = allocate_threshold_blocks(cpu, tb, bank, block, address); if (err) goto out_free; @@ -1298,8 +1298,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out_free; } - per_cpu(threshold_banks, cpu)[bank] = b; - if (is_shared_bank(bank)) { refcount_set(&b->cpus, 1); @@ -1310,9 +1308,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank)); - if (!err) - goto out; + err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); + if (err) + goto out_free; + + per_cpu(threshold_banks, cpu)[bank] = b; + + return 0; out_free: kfree(b); @@ -1321,8 +1323,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) return err; } -static void deallocate_threshold_block(unsigned int cpu, - unsigned int bank) +static void threshold_block_release(struct kobject *kobj) +{ + kfree(to_block(kobj)); +} + +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { struct threshold_block *pos = NULL; struct threshold_block *tmp = NULL; @@ -1332,13 +1338,11 @@ static void deallocate_threshold_block(unsigned int cpu, return; list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_put(&pos->kobj); list_del(&pos->miscj); - kfree(pos); + kobject_put(&pos->kobj); } - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; + kobject_put(&head->blocks->kobj); } static void __threshold_remove_blocks(struct threshold_bank *b) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index d05be307d081a67c12eef433b5c832fcfcffa1a7..1d87b85150dbd5bb527a92c396823ecab7cbf61d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -489,17 +489,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) return; if ((val & 3UL) == 1UL) { - /* PPIN available but disabled: */ + /* PPIN locked in disabled mode */ return; } - /* If PPIN is disabled, but not locked, try to enable: */ - if (!(val & 3UL)) { + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); rdmsrl_safe(MSR_PPIN_CTL, &val); } - if ((val & 3UL) == 2UL) + /* Is the enable bit set? */ + if (val & 2UL) set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); } } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c0201b11e9e2a36439853975318c77521f680082..a6b323a3a63047cb54643052b92f33fbb0f17979 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -178,8 +178,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - pr_info("Hyper-V: features 0x%x, hints 0x%x\n", - ms_hyperv.features, ms_hyperv.hints); + pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n", + ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features); ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS); ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index ed7ce5184a772d2cddb5124ed6f005e445983b08..0b9c7150cb23f00ac777232e226e7a299672e9b9 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, + { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 }, { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 }, { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d805202c63cdccfeaa303f5a5e14cd38ee6341d7..917840ed5fe47c585d13ab7081a2093cf381b28c 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -45,7 +45,7 @@ #define VMWARE_PORT_CMD_VCPU_RESERVED 31 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ - __asm__("inl (%%dx)" : \ + __asm__("inl (%%dx), %%eax" : \ "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ "0"(VMWARE_HYPERVISOR_MAGIC), \ "1"(VMWARE_PORT_CMD_##cmd), \ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ccc2b9d2956aa4a80e3a0cb3f82346b84df190b4..387340b1f6dbf9ecc8b26c4ef329e5eb30a3d196 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -36,6 +36,7 @@ #ifdef CONFIG_DYNAMIC_FTRACE int ftrace_arch_code_modify_prepare(void) + __acquires(&text_mutex) { mutex_lock(&text_mutex); set_kernel_text_rw(); @@ -44,6 +45,7 @@ int ftrace_arch_code_modify_prepare(void) } int ftrace_arch_code_modify_post_process(void) + __releases(&text_mutex) { set_all_modules_text_ro(); set_kernel_text_ro(); diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index f58336af095c9d3050e85c5dea79a164b275e420..fcd4d7b7d33020357d975284289d108a6802db4e 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -201,6 +201,10 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, goto overflow; #endif break; + case R_X86_64_8: + if (!strncmp(strtab + sym->st_name, "__typeid__", 10)) + break; + /* fallthrough */ default: pr_err("%s: Unknown rela relocation: %llu\n", me->name, ELF64_R_TYPE(rel[i].r_info)); diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 85195d447a922785857db0caacc73d6bc9b9490c..f3215346e47fd8d822191d9ff879896e1f5e7a4f 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -94,11 +94,11 @@ __init int create_simplefb(const struct screen_info *si, if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) size <<= 16; length = mode->height * mode->stride; - length = PAGE_ALIGN(length); if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } + length = PAGE_ALIGN(length); /* setup IORESOURCE_MEM as framebuffer memory */ memset(&res, 0, sizeof(res)); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 2384a2ae5ec3e9db7e660d80b45dfc4287cb9139..2e994fff9164b6fc9d2dbc5cb93ca1d1fe0bf6ea 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -132,15 +132,20 @@ SECTIONS __indirect_thunk_end = .; #endif - /* End of text section */ - _etext = .; +#ifdef CONFIG_CFI_CLANG + . = ALIGN(PAGE_SIZE); + __cfi_jt_start = .; + *(.text..L.cfi.jumptable .text..L.cfi.jumptable.*) + __cfi_jt_end = .; +#endif } :text = 0x9090 NOTES :text :note EXCEPTION_TABLE(16) :text = 0x9090 - /* .text should occupy whole number of pages */ + /* End of text section, which should occupy whole number of pages */ + _etext = .; . = ALIGN(PAGE_SIZE); X64_ALIGN_RODATA_BEGIN RO_DATA(PAGE_SIZE) @@ -352,7 +357,7 @@ SECTIONS .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; *(.bss..page_aligned) - *(.bss) + *(BSS_MAIN) . = ALIGN(PAGE_SIZE); __bss_stop = .; } @@ -414,3 +419,7 @@ INIT_PER_CPU(irq_stack_union); "kexec control code size is too big"); #endif +#ifdef CONFIG_CFI_CLANG +. = ASSERT((__cfi_jt_end - __cfi_jt_start > 0), + "CFI jump table is empty"); +#endif diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 38959b173a4232d61bdfeff5621b588a69919166..6ec1cfd0addd89c0b2919a20153744c4ebabc2d3 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -291,13 +291,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, { switch (func) { case 0: - entry->eax = 1; /* only one leaf currently */ + entry->eax = 7; ++*nent; break; case 1: entry->ecx = F(MOVBE); ++*nent; break; + case 7: + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + if (index == 0) + entry->ecx = F(RDPID); + ++*nent; default: break; } @@ -489,7 +494,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->edx |= F(SPEC_CTRL); if (boot_cpu_has(X86_FEATURE_STIBP)) entry->edx |= F(INTEL_STIBP); - if (boot_cpu_has(X86_FEATURE_SSBD)) + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->edx |= F(SPEC_CTRL_SSBD); /* * We emulate ARCH_CAPABILITIES in software even diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 041b9b05fae19a418ef365724a2d0889ca821699..4cc8a4a6f1d00946ed2379f3363fcd8ca6fb5316 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3539,6 +3539,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_rdpid(struct x86_emulate_ctxt *ctxt) +{ + u64 tsc_aux = 0; + + if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux)) + return emulate_gp(ctxt, 0); + ctxt->dst.val = tsc_aux; + return X86EMUL_CONTINUE; +} + static int em_rdtsc(struct x86_emulate_ctxt *ctxt) { u64 tsc = 0; @@ -4431,10 +4441,20 @@ static const struct opcode group8[] = { F(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; +/* + * The "memory" destination is actually always a register, since we come + * from the register case of group9. + */ +static const struct gprefix pfx_0f_c7_7 = { + N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp), +}; + + static const struct group_dual group9 = { { N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, }, { - N, N, N, N, N, N, N, N, + N, N, N, N, N, N, N, + GP(0, &pfx_0f_c7_7), } }; static const struct opcode group11[] = { @@ -5042,6 +5062,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->fetch.ptr = ctxt->fetch.data; ctxt->fetch.end = ctxt->fetch.data + insn_len; ctxt->opcode_len = 1; + ctxt->intercept = x86_intercept_none; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); else { diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 3cc3b2d130a0c277e6b4cb6f59aa2a391d2253f6..4d000aea05e09bd0d511796f757f4a485194d05e 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -427,7 +427,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm_set_msi_irq(vcpu->kvm, entry, &irq); - if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, + if (irq.trig_mode && kvm_apic_match_dest(vcpu, NULL, 0, irq.dest_id, irq.dest_mode)) __set_bit(irq.vector, ioapic_handled_vectors); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8715711f27559693e366112f11b8bb8356095507..537c36b55b5d73e115ec392857a7d25d6db81a7d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -566,9 +566,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) { u8 val; - if (pv_eoi_get_user(vcpu, &val) < 0) + if (pv_eoi_get_user(vcpu, &val) < 0) { apic_debug("Can't read EOI MSR value: 0x%llx\n", (unsigned long long)vcpu->arch.pv_eoi.msr_val); + return false; + } return val & 0x1; } @@ -993,11 +995,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic_clear_vector(vector, apic->regs + APIC_TMR); } - if (vcpu->arch.apicv_active) - kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); - else { + if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) { kvm_lapic_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 52edb8cf1c400565556c325e55c5b2bdc7d73ac0..d636213864185958833505112e8157f79b144c27 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1088,6 +1088,47 @@ static int avic_ga_log_notifier(u32 ga_tag) return 0; } +/* + * The default MMIO mask is a single bit (excluding the present bit), + * which could conflict with the memory encryption bit. Check for + * memory encryption support and override the default MMIO mask if + * memory encryption is enabled. + */ +static __init void svm_adjust_mmio_mask(void) +{ + unsigned int enc_bit, mask_bit; + u64 msr, mask; + + /* If there is no memory encryption support, use existing mask */ + if (cpuid_eax(0x80000000) < 0x8000001f) + return; + + /* If memory encryption is not enabled, use existing mask */ + rdmsrl(MSR_K8_SYSCFG, msr); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + return; + + enc_bit = cpuid_ebx(0x8000001f) & 0x3f; + mask_bit = boot_cpu_data.x86_phys_bits; + + /* Increment the mask bit if it is the same as the encryption bit */ + if (enc_bit == mask_bit) + mask_bit++; + + /* + * If the mask bit location is below 52, then some bits above the + * physical addressing limit will always be reserved, so use the + * rsvd_bits() function to generate the mask. This mask, along with + * the present bit, will be used to generate a page fault with + * PFER.RSV = 1. + * + * If the mask bit location is 52 (or above), then clear the mask. + */ + mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; + + kvm_mmu_set_mmio_spte_mask(mask, mask); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1123,6 +1164,8 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); } + svm_adjust_mmio_mask(); + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) @@ -4631,8 +4674,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) +static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) { + if (!vcpu->arch.apicv_active) + return -1; + kvm_lapic_set_irr(vec, vcpu->arch.apic); smp_mb__after_atomic(); @@ -4641,6 +4687,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) kvm_cpu_get_apicid(vcpu->cpu)); else kvm_vcpu_wake_up(vcpu); + + return 0; } static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 809d1b031fd9e5a1ad2cdb655ecd621c0d125915..c139dedec12bc028ec34adbab6097cd79c51b37f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1674,43 +1674,15 @@ static void vmcs_load(struct vmcs *vmcs) } #ifdef CONFIG_KEXEC_CORE -/* - * This bitmap is used to indicate whether the vmclear - * operation is enabled on all cpus. All disabled by - * default. - */ -static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE; - -static inline void crash_enable_local_vmclear(int cpu) -{ - cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline void crash_disable_local_vmclear(int cpu) -{ - cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline int crash_local_vmclear_enabled(int cpu) -{ - return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - static void crash_vmclear_local_loaded_vmcss(void) { int cpu = raw_smp_processor_id(); struct loaded_vmcs *v; - if (!crash_local_vmclear_enabled(cpu)) - return; - list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), loaded_vmcss_on_cpu_link) vmcs_clear(v->vmcs); } -#else -static inline void crash_enable_local_vmclear(int cpu) { } -static inline void crash_disable_local_vmclear(int cpu) { } #endif /* CONFIG_KEXEC_CORE */ static void __loaded_vmcs_clear(void *arg) @@ -1722,19 +1694,24 @@ static void __loaded_vmcs_clear(void *arg) return; /* vcpu migration can race with cpu offline */ if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) per_cpu(current_vmcs, cpu) = NULL; - crash_disable_local_vmclear(cpu); + + vmcs_clear(loaded_vmcs->vmcs); + if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) + vmcs_clear(loaded_vmcs->shadow_vmcs); + list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); /* - * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link - * is before setting loaded_vmcs->vcpu to -1 which is done in - * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist - * then adds the vmcs into percpu list before it is deleted. + * Ensure all writes to loaded_vmcs, including deleting it from its + * current percpu list, complete before setting loaded_vmcs->vcpu to + * -1, otherwise a different cpu can see vcpu == -1 first and add + * loaded_vmcs to its percpu list before it's deleted from this cpu's + * list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs(). */ smp_wmb(); - loaded_vmcs_init(loaded_vmcs); - crash_enable_local_vmclear(cpu); + loaded_vmcs->cpu = -1; + loaded_vmcs->launched = 0; } static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) @@ -2497,18 +2474,17 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (!already_loaded) { loaded_vmcs_clear(vmx->loaded_vmcs); local_irq_disable(); - crash_disable_local_vmclear(cpu); /* - * Read loaded_vmcs->cpu should be before fetching - * loaded_vmcs->loaded_vmcss_on_cpu_link. - * See the comments in __loaded_vmcs_clear(). + * Ensure loaded_vmcs->cpu is read before adding loaded_vmcs to + * this cpu's percpu list, otherwise it may not yet be deleted + * from its previous cpu's percpu list. Pairs with the + * smb_wmb() in __loaded_vmcs_clear(). */ smp_rmb(); list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, &per_cpu(loaded_vmcss_on_cpu, cpu)); - crash_enable_local_vmclear(cpu); local_irq_enable(); } @@ -3800,21 +3776,6 @@ static int hardware_enable(void) if (cr4_read_shadow() & X86_CR4_VMXE) return -EBUSY; - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); - INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); - spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); - - /* - * Now we can enable the vmclear operation in kdump - * since the loaded_vmcss_on_cpu list on this cpu - * has been initialized. - * - * Though the cpu is not in VMX operation now, there - * is no problem to enable the vmclear operation - * for the loaded_vmcss_on_cpu list is empty! - */ - crash_enable_local_vmclear(cpu); - rdmsrl(MSR_IA32_FEATURE_CONTROL, old); test_bits = FEATURE_CONTROL_LOCKED; @@ -4597,6 +4558,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static int get_ept_level(struct kvm_vcpu *vcpu) { + /* Nested EPT currently only supports 4-level walks. */ + if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) + return 4; if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; @@ -4988,6 +4952,26 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) (ss.selector & SEGMENT_RPL_MASK)); } +static bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, + unsigned int port, int size); +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + unsigned long exit_qualification; + unsigned short port; + int size; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; + + return nested_vmx_check_io_bitmaps(vcpu, port, size); +} + /* * Check if guest state is valid. Returns true if valid, false if * not. @@ -5518,24 +5502,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, * 2. If target vcpu isn't running(root mode), kick it to pick up the * interrupt from PIR in next vmentry. */ -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) { struct vcpu_vmx *vmx = to_vmx(vcpu); int r; r = vmx_deliver_nested_posted_interrupt(vcpu, vector); if (!r) - return; + return 0; + + if (!vcpu->arch.apicv_active) + return -1; if (pi_test_and_set_pir(vector, &vmx->pi_desc)) - return; + return 0; /* If a previous notification has sent the IPI, nothing to do. */ if (pi_test_and_set_on(&vmx->pi_desc)) - return; + return 0; if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); + + return 0; } /* @@ -6170,8 +6159,13 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { - return (!to_vmx(vcpu)->nested.nested_run_pending && - vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + if (to_vmx(vcpu)->nested.nested_run_pending) + return false; + + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) + return true; + + return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); } @@ -6256,7 +6250,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, */ static void kvm_machine_check(void) { -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) +#if defined(CONFIG_X86_MCE) struct pt_regs regs = { .cs = 3, /* Fake ring 3 no matter what the guest ran on */ .flags = X86_EFLAGS_IF, @@ -8518,23 +8512,17 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); -static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +/* + * Return true if an IO instruction with the specified port and size should cause + * a VM-exit into L1. + */ +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, + int size) { - unsigned long exit_qualification; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); gpa_t bitmap, last_bitmap; - unsigned int port; - int size; u8 b; - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) - return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - port = exit_qualification >> 16; - size = (exit_qualification & 7) + 1; - last_bitmap = (gpa_t)-1; b = -1; @@ -11637,7 +11625,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, } } -static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) +static int vmx_check_nested_events(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qual; @@ -11675,8 +11663,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) return 0; } - if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && - nested_exit_on_intr(vcpu)) { + if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(vcpu)) { if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); @@ -12232,17 +12219,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; if (likely(!vmx->fail)) { - /* - * TODO: SDM says that with acknowledge interrupt on - * exit, bit 31 of the VM-exit interrupt information - * (valid interrupt) is always set to 1 on - * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't - * need kvm_cpu_has_interrupt(). See the commit - * message for details. - */ - if (nested_exit_intr_ack_set(vcpu) && - exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && - kvm_cpu_has_interrupt(vcpu)) { + if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && + nested_exit_intr_ack_set(vcpu)) { int irq = kvm_cpu_get_interrupt(vcpu); WARN_ON(irq < 0); vmcs12->vm_exit_intr_info = irq | @@ -12318,11 +12296,86 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, to_vmx(vcpu)->nested.sync_shadow_vmcs = true; } +static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned short port; + bool intercept; + int size; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + port = info->src_val; + size = info->dst_bytes; + } else { + port = info->dst_val; + size = info->src_bytes; + } + + /* + * If the 'use IO bitmaps' VM-execution control is 0, IO instruction + * VM-exits depend on the 'unconditional IO exiting' VM-execution + * control. + * + * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps. + */ + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + intercept = nested_cpu_has(vmcs12, + CPU_BASED_UNCOND_IO_EXITING); + else + intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; +} + static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) { - return X86EMUL_CONTINUE; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + + switch (info->intercept) { + /* + * RDPID causes #UD if disabled through secondary execution controls. + * Because it is marked as EmulateOnUD, we need to intercept it here. + */ + case x86_intercept_rdtscp: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + ctxt->exception.vector = UD_VECTOR; + ctxt->exception.error_code_valid = false; + return X86EMUL_PROPAGATE_FAULT; + } + break; + + case x86_intercept_in: + case x86_intercept_ins: + case x86_intercept_out: + case x86_intercept_outs: + return vmx_check_intercept_io(vcpu, info); + + case x86_intercept_lgdt: + case x86_intercept_lidt: + case x86_intercept_lldt: + case x86_intercept_ltr: + case x86_intercept_sgdt: + case x86_intercept_sidt: + case x86_intercept_sldt: + case x86_intercept_str: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC)) + return X86EMUL_CONTINUE; + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + break; + + /* TODO: check more intercepts... */ + default: + break; + } + + return X86EMUL_UNHANDLEABLE; } #ifdef CONFIG_X86_64 @@ -12843,7 +12896,7 @@ module_exit(vmx_exit) static int __init vmx_init(void) { - int r; + int r, cpu; r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx), THIS_MODULE); @@ -12865,6 +12918,12 @@ static int __init vmx_init(void) } } + for_each_possible_cpu(cpu) { + INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); + INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); + spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + } + #ifdef CONFIG_KEXEC_CORE rcu_assign_pointer(crash_vmclear_loaded_vmcss, crash_vmclear_local_loaded_vmcss); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c deleted file mode 100644 index 3791ce8d269e00ba4ebeec40880239716bcabf96..0000000000000000000000000000000000000000 --- a/arch/x86/kvm/vmx/vmx.c +++ /dev/null @@ -1,8033 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Kernel-based Virtual Machine driver for Linux - * - * This module enables machines with Intel VT-x extensions to run virtual - * machines without emulation or binary translation. - * - * Copyright (C) 2006 Qumranet, Inc. - * Copyright 2010 Red Hat, Inc. and/or its affiliates. - * - * Authors: - * Avi Kivity - * Yaniv Kamay - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "capabilities.h" -#include "cpuid.h" -#include "evmcs.h" -#include "irq.h" -#include "kvm_cache_regs.h" -#include "lapic.h" -#include "mmu.h" -#include "nested.h" -#include "ops.h" -#include "pmu.h" -#include "trace.h" -#include "vmcs.h" -#include "vmcs12.h" -#include "vmx.h" -#include "x86.h" - -MODULE_AUTHOR("Qumranet"); -MODULE_LICENSE("GPL"); - -static const struct x86_cpu_id vmx_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_VMX), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); - -bool __read_mostly enable_vpid = 1; -module_param_named(vpid, enable_vpid, bool, 0444); - -static bool __read_mostly enable_vnmi = 1; -module_param_named(vnmi, enable_vnmi, bool, S_IRUGO); - -bool __read_mostly flexpriority_enabled = 1; -module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); - -bool __read_mostly enable_ept = 1; -module_param_named(ept, enable_ept, bool, S_IRUGO); - -bool __read_mostly enable_unrestricted_guest = 1; -module_param_named(unrestricted_guest, - enable_unrestricted_guest, bool, S_IRUGO); - -bool __read_mostly enable_ept_ad_bits = 1; -module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); - -static bool __read_mostly emulate_invalid_guest_state = true; -module_param(emulate_invalid_guest_state, bool, S_IRUGO); - -static bool __read_mostly fasteoi = 1; -module_param(fasteoi, bool, S_IRUGO); - -static bool __read_mostly enable_apicv = 1; -module_param(enable_apicv, bool, S_IRUGO); - -/* - * If nested=1, nested virtualization is supported, i.e., guests may use - * VMX and be a hypervisor for its own guests. If nested=0, guests may not - * use VMX instructions. - */ -static bool __read_mostly nested = 1; -module_param(nested, bool, S_IRUGO); - -bool __read_mostly enable_pml = 1; -module_param_named(pml, enable_pml, bool, S_IRUGO); - -static bool __read_mostly dump_invalid_vmcs = 0; -module_param(dump_invalid_vmcs, bool, 0644); - -#define MSR_BITMAP_MODE_X2APIC 1 -#define MSR_BITMAP_MODE_X2APIC_APICV 2 - -#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL - -/* Guest_tsc -> host_tsc conversion requires 64-bit division. */ -static int __read_mostly cpu_preemption_timer_multi; -static bool __read_mostly enable_preemption_timer = 1; -#ifdef CONFIG_X86_64 -module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); -#endif - -#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) -#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE -#define KVM_VM_CR0_ALWAYS_ON \ - (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ - X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) -#define KVM_CR4_GUEST_OWNED_BITS \ - (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) - -#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE -#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) -#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) - -#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) - -#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \ - RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \ - RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \ - RTIT_STATUS_BYTECNT)) - -#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \ - (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f) - -/* - * These 2 parameters are used to config the controls for Pause-Loop Exiting: - * ple_gap: upper bound on the amount of time between two successive - * executions of PAUSE in a loop. Also indicate if ple enabled. - * According to test, this time is usually smaller than 128 cycles. - * ple_window: upper bound on the amount of time a guest is allowed to execute - * in a PAUSE loop. Tests indicate that most spinlocks are held for - * less than 2^12 cycles - * Time is measured based on a counter that runs at the same rate as the TSC, - * refer SDM volume 3b section 21.6.13 & 22.1.3. - */ -static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; -module_param(ple_gap, uint, 0444); - -static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; -module_param(ple_window, uint, 0444); - -/* Default doubles per-vcpu window every exit. */ -static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW; -module_param(ple_window_grow, uint, 0444); - -/* Default resets per-vcpu window every exit to ple_window. */ -static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; -module_param(ple_window_shrink, uint, 0444); - -/* Default is to compute the maximum so we can never overflow. */ -static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; -module_param(ple_window_max, uint, 0444); - -/* Default is SYSTEM mode, 1 for host-guest mode */ -int __read_mostly pt_mode = PT_MODE_SYSTEM; -module_param(pt_mode, int, S_IRUGO); - -static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); -static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); -static DEFINE_MUTEX(vmx_l1d_flush_mutex); - -/* Storage for pre module init parameter parsing */ -static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO; - -static const struct { - const char *option; - bool for_parse; -} vmentry_l1d_param[] = { - [VMENTER_L1D_FLUSH_AUTO] = {"auto", true}, - [VMENTER_L1D_FLUSH_NEVER] = {"never", true}, - [VMENTER_L1D_FLUSH_COND] = {"cond", true}, - [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true}, - [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false}, - [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false}, -}; - -#define L1D_CACHE_ORDER 4 -static void *vmx_l1d_flush_pages; - -static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) -{ - struct page *page; - unsigned int i; - - if (!boot_cpu_has_bug(X86_BUG_L1TF)) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; - return 0; - } - - if (!enable_ept) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; - return 0; - } - - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { - u64 msr; - - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); - if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; - return 0; - } - } - - /* If set to auto use the default l1tf mitigation method */ - if (l1tf == VMENTER_L1D_FLUSH_AUTO) { - switch (l1tf_mitigation) { - case L1TF_MITIGATION_OFF: - l1tf = VMENTER_L1D_FLUSH_NEVER; - break; - case L1TF_MITIGATION_FLUSH_NOWARN: - case L1TF_MITIGATION_FLUSH: - case L1TF_MITIGATION_FLUSH_NOSMT: - l1tf = VMENTER_L1D_FLUSH_COND; - break; - case L1TF_MITIGATION_FULL: - case L1TF_MITIGATION_FULL_FORCE: - l1tf = VMENTER_L1D_FLUSH_ALWAYS; - break; - } - } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) { - l1tf = VMENTER_L1D_FLUSH_ALWAYS; - } - - if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && - !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { - /* - * This allocation for vmx_l1d_flush_pages is not tied to a VM - * lifetime and so should not be charged to a memcg. - */ - page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); - if (!page) - return -ENOMEM; - vmx_l1d_flush_pages = page_address(page); - - /* - * Initialize each page with a different pattern in - * order to protect against KSM in the nested - * virtualization case. - */ - for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) { - memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1, - PAGE_SIZE); - } - } - - l1tf_vmx_mitigation = l1tf; - - if (l1tf != VMENTER_L1D_FLUSH_NEVER) - static_branch_enable(&vmx_l1d_should_flush); - else - static_branch_disable(&vmx_l1d_should_flush); - - if (l1tf == VMENTER_L1D_FLUSH_COND) - static_branch_enable(&vmx_l1d_flush_cond); - else - static_branch_disable(&vmx_l1d_flush_cond); - return 0; -} - -static int vmentry_l1d_flush_parse(const char *s) -{ - unsigned int i; - - if (s) { - for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { - if (vmentry_l1d_param[i].for_parse && - sysfs_streq(s, vmentry_l1d_param[i].option)) - return i; - } - } - return -EINVAL; -} - -static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) -{ - int l1tf, ret; - - l1tf = vmentry_l1d_flush_parse(s); - if (l1tf < 0) - return l1tf; - - if (!boot_cpu_has(X86_BUG_L1TF)) - return 0; - - /* - * Has vmx_init() run already? If not then this is the pre init - * parameter parsing. In that case just store the value and let - * vmx_init() do the proper setup after enable_ept has been - * established. - */ - if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) { - vmentry_l1d_flush_param = l1tf; - return 0; - } - - mutex_lock(&vmx_l1d_flush_mutex); - ret = vmx_setup_l1d_flush(l1tf); - mutex_unlock(&vmx_l1d_flush_mutex); - return ret; -} - -static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) -{ - if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param))) - return sprintf(s, "???\n"); - - return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); -} - -static const struct kernel_param_ops vmentry_l1d_flush_ops = { - .set = vmentry_l1d_flush_set, - .get = vmentry_l1d_flush_get, -}; -module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644); - -static bool guest_state_valid(struct kvm_vcpu *vcpu); -static u32 vmx_segment_access_rights(struct kvm_segment *var); -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type); - -void vmx_vmexit(void); - -#define vmx_insn_failed(fmt...) \ -do { \ - WARN_ONCE(1, fmt); \ - pr_warn_ratelimited(fmt); \ -} while (0) - -asmlinkage void vmread_error(unsigned long field, bool fault) -{ - if (fault) - kvm_spurious_fault(); - else - vmx_insn_failed("kvm: vmread failed: field=%lx\n", field); -} - -noinline void vmwrite_error(unsigned long field, unsigned long value) -{ - vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n", - field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); -} - -noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr) -{ - vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr); -} - -noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr) -{ - vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr); -} - -noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva) -{ - vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", - ext, vpid, gva); -} - -noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) -{ - vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", - ext, eptp, gpa); -} - -static DEFINE_PER_CPU(struct vmcs *, vmxarea); -DEFINE_PER_CPU(struct vmcs *, current_vmcs); -/* - * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed - * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it. - */ -static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); - -/* - * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we - * can find which vCPU should be waken up. - */ -static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); -static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); - -static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); -static DEFINE_SPINLOCK(vmx_vpid_lock); - -struct vmcs_config vmcs_config; -struct vmx_capability vmx_capability; - -#define VMX_SEGMENT_FIELD(seg) \ - [VCPU_SREG_##seg] = { \ - .selector = GUEST_##seg##_SELECTOR, \ - .base = GUEST_##seg##_BASE, \ - .limit = GUEST_##seg##_LIMIT, \ - .ar_bytes = GUEST_##seg##_AR_BYTES, \ - } - -static const struct kvm_vmx_segment_field { - unsigned selector; - unsigned base; - unsigned limit; - unsigned ar_bytes; -} kvm_vmx_segment_fields[] = { - VMX_SEGMENT_FIELD(CS), - VMX_SEGMENT_FIELD(DS), - VMX_SEGMENT_FIELD(ES), - VMX_SEGMENT_FIELD(FS), - VMX_SEGMENT_FIELD(GS), - VMX_SEGMENT_FIELD(SS), - VMX_SEGMENT_FIELD(TR), - VMX_SEGMENT_FIELD(LDTR), -}; - -u64 host_efer; -static unsigned long host_idt_base; - -/* - * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm - * will emulate SYSCALL in legacy mode if the vendor string in guest - * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To - * support this emulation, IA32_STAR must always be included in - * vmx_msr_index[], even in i386 builds. - */ -const u32 vmx_msr_index[] = { -#ifdef CONFIG_X86_64 - MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, -#endif - MSR_EFER, MSR_TSC_AUX, MSR_STAR, - MSR_IA32_TSX_CTRL, -}; - -#if IS_ENABLED(CONFIG_HYPERV) -static bool __read_mostly enlightened_vmcs = true; -module_param(enlightened_vmcs, bool, 0444); - -/* check_ept_pointer() should be under protection of ept_pointer_lock. */ -static void check_ept_pointer_match(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - u64 tmp_eptp = INVALID_PAGE; - int i; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!VALID_PAGE(tmp_eptp)) { - tmp_eptp = to_vmx(vcpu)->ept_pointer; - } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) { - to_kvm_vmx(kvm)->ept_pointers_match - = EPT_POINTERS_MISMATCH; - return; - } - } - - to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH; -} - -static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush, - void *data) -{ - struct kvm_tlb_range *range = data; - - return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn, - range->pages); -} - -static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm, - struct kvm_vcpu *vcpu, struct kvm_tlb_range *range) -{ - u64 ept_pointer = to_vmx(vcpu)->ept_pointer; - - /* - * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address - * of the base of EPT PML4 table, strip off EPT configuration - * information. - */ - if (range) - return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK, - kvm_fill_hv_flush_list_func, (void *)range); - else - return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK); -} - -static int hv_remote_flush_tlb_with_range(struct kvm *kvm, - struct kvm_tlb_range *range) -{ - struct kvm_vcpu *vcpu; - int ret = 0, i; - - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); - - if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK) - check_ept_pointer_match(kvm); - - if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) { - kvm_for_each_vcpu(i, vcpu, kvm) { - /* If ept_pointer is invalid pointer, bypass flush request. */ - if (VALID_PAGE(to_vmx(vcpu)->ept_pointer)) - ret |= __hv_remote_flush_tlb_with_range( - kvm, vcpu, range); - } - } else { - ret = __hv_remote_flush_tlb_with_range(kvm, - kvm_get_vcpu(kvm, 0), range); - } - - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); - return ret; -} -static int hv_remote_flush_tlb(struct kvm *kvm) -{ - return hv_remote_flush_tlb_with_range(kvm, NULL); -} - -static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) -{ - struct hv_enlightened_vmcs *evmcs; - struct hv_partition_assist_pg **p_hv_pa_pg = - &vcpu->kvm->arch.hyperv.hv_pa_pg; - /* - * Synthetic VM-Exit is not enabled in current code and so All - * evmcs in singe VM shares same assist page. - */ - if (!*p_hv_pa_pg) - *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL); - - if (!*p_hv_pa_pg) - return -ENOMEM; - - evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; - - evmcs->partition_assist_page = - __pa(*p_hv_pa_pg); - evmcs->hv_vm_id = (unsigned long)vcpu->kvm; - evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; - - return 0; -} - -#endif /* IS_ENABLED(CONFIG_HYPERV) */ - -/* - * Comment's format: document - errata name - stepping - processor name. - * Refer from - * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp - */ -static u32 vmx_preemption_cpu_tfms[] = { -/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */ -0x000206E6, -/* 323056.pdf - AAX65 - C2 - Xeon L3406 */ -/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */ -/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */ -0x00020652, -/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */ -0x00020655, -/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */ -/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */ -/* - * 320767.pdf - AAP86 - B1 - - * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile - */ -0x000106E5, -/* 321333.pdf - AAM126 - C0 - Xeon 3500 */ -0x000106A0, -/* 321333.pdf - AAM126 - C1 - Xeon 3500 */ -0x000106A1, -/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */ -0x000106A4, - /* 321333.pdf - AAM126 - D0 - Xeon 3500 */ - /* 321324.pdf - AAK139 - D0 - Xeon 5500 */ - /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */ -0x000106A5, - /* Xeon E3-1220 V2 */ -0x000306A8, -}; - -static inline bool cpu_has_broken_vmx_preemption_timer(void) -{ - u32 eax = cpuid_eax(0x00000001), i; - - /* Clear the reserved bits */ - eax &= ~(0x3U << 14 | 0xfU << 28); - for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++) - if (eax == vmx_preemption_cpu_tfms[i]) - return true; - - return false; -} - -static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) -{ - return flexpriority_enabled && lapic_in_kernel(vcpu); -} - -static inline bool report_flexpriority(void) -{ - return flexpriority_enabled; -} - -static inline int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) -{ - int i; - - for (i = 0; i < vmx->nmsrs; ++i) - if (vmx_msr_index[vmx->guest_msrs[i].index] == msr) - return i; - return -1; -} - -struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) -{ - int i; - - i = __find_msr_index(vmx, msr); - if (i >= 0) - return &vmx->guest_msrs[i]; - return NULL; -} - -static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data) -{ - int ret = 0; - - u64 old_msr_data = msr->data; - msr->data = data; - if (msr - vmx->guest_msrs < vmx->save_nmsrs) { - preempt_disable(); - ret = kvm_set_shared_msr(msr->index, msr->data, - msr->mask); - preempt_enable(); - if (ret) - msr->data = old_msr_data; - } - return ret; -} - -void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) -{ - vmcs_clear(loaded_vmcs->vmcs); - if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) - vmcs_clear(loaded_vmcs->shadow_vmcs); - loaded_vmcs->cpu = -1; - loaded_vmcs->launched = 0; -} - -#ifdef CONFIG_KEXEC_CORE -/* - * This bitmap is used to indicate whether the vmclear - * operation is enabled on all cpus. All disabled by - * default. - */ -static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE; - -static inline void crash_enable_local_vmclear(int cpu) -{ - cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline void crash_disable_local_vmclear(int cpu) -{ - cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline int crash_local_vmclear_enabled(int cpu) -{ - return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static void crash_vmclear_local_loaded_vmcss(void) -{ - int cpu = raw_smp_processor_id(); - struct loaded_vmcs *v; - - if (!crash_local_vmclear_enabled(cpu)) - return; - - list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), - loaded_vmcss_on_cpu_link) - vmcs_clear(v->vmcs); -} -#else -static inline void crash_enable_local_vmclear(int cpu) { } -static inline void crash_disable_local_vmclear(int cpu) { } -#endif /* CONFIG_KEXEC_CORE */ - -static void __loaded_vmcs_clear(void *arg) -{ - struct loaded_vmcs *loaded_vmcs = arg; - int cpu = raw_smp_processor_id(); - - if (loaded_vmcs->cpu != cpu) - return; /* vcpu migration can race with cpu offline */ - if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) - per_cpu(current_vmcs, cpu) = NULL; - crash_disable_local_vmclear(cpu); - list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); - - /* - * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link - * is before setting loaded_vmcs->vcpu to -1 which is done in - * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist - * then adds the vmcs into percpu list before it is deleted. - */ - smp_wmb(); - - loaded_vmcs_init(loaded_vmcs); - crash_enable_local_vmclear(cpu); -} - -void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) -{ - int cpu = loaded_vmcs->cpu; - - if (cpu != -1) - smp_call_function_single(cpu, - __loaded_vmcs_clear, loaded_vmcs, 1); -} - -static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, - unsigned field) -{ - bool ret; - u32 mask = 1 << (seg * SEG_FIELD_NR + field); - - if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) { - kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS); - vmx->segment_cache.bitmask = 0; - } - ret = vmx->segment_cache.bitmask & mask; - vmx->segment_cache.bitmask |= mask; - return ret; -} - -static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) -{ - u16 *p = &vmx->segment_cache.seg[seg].selector; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) - *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); - return *p; -} - -static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) -{ - ulong *p = &vmx->segment_cache.seg[seg].base; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) - *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); - return *p; -} - -static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) -{ - u32 *p = &vmx->segment_cache.seg[seg].limit; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) - *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); - return *p; -} - -static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) -{ - u32 *p = &vmx->segment_cache.seg[seg].ar; - - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) - *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); - return *p; -} - -void update_exception_bitmap(struct kvm_vcpu *vcpu) -{ - u32 eb; - - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | - (1u << DB_VECTOR) | (1u << AC_VECTOR); - /* - * Guest access to VMware backdoor ports could legitimately - * trigger #GP because of TSS I/O permission bitmap. - * We intercept those #GP and allow access to them anyway - * as VMware does. - */ - if (enable_vmware_backdoor) - eb |= (1u << GP_VECTOR); - if ((vcpu->guest_debug & - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) - eb |= 1u << BP_VECTOR; - if (to_vmx(vcpu)->rmode.vm86_active) - eb = ~0; - if (enable_ept) - eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ - - /* When we are running a nested L2 guest and L1 specified for it a - * certain exception bitmap, we must trap the same exceptions and pass - * them to L1. When running L2, we will only handle the exceptions - * specified above if L1 did not want them. - */ - if (is_guest_mode(vcpu)) - eb |= get_vmcs12(vcpu)->exception_bitmap; - - vmcs_write32(EXCEPTION_BITMAP, eb); -} - -/* - * Check if MSR is intercepted for currently loaded MSR bitmap. - */ -static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) -{ - unsigned long *msr_bitmap; - int f = sizeof(unsigned long); - - if (!cpu_has_vmx_msr_bitmap()) - return true; - - msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; - - if (msr <= 0x1fff) { - return !!test_bit(msr, msr_bitmap + 0x800 / f); - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - return !!test_bit(msr, msr_bitmap + 0xc00 / f); - } - - return true; -} - -static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, - unsigned long entry, unsigned long exit) -{ - vm_entry_controls_clearbit(vmx, entry); - vm_exit_controls_clearbit(vmx, exit); -} - -int vmx_find_msr_index(struct vmx_msrs *m, u32 msr) -{ - unsigned int i; - - for (i = 0; i < m->nr; ++i) { - if (m->val[i].index == msr) - return i; - } - return -ENOENT; -} - -static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) -{ - int i; - struct msr_autoload *m = &vmx->msr_autoload; - - switch (msr) { - case MSR_EFER: - if (cpu_has_load_ia32_efer()) { - clear_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_EFER, - VM_EXIT_LOAD_IA32_EFER); - return; - } - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - if (cpu_has_load_perf_global_ctrl()) { - clear_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); - return; - } - break; - } - i = vmx_find_msr_index(&m->guest, msr); - if (i < 0) - goto skip_guest; - --m->guest.nr; - m->guest.val[i] = m->guest.val[m->guest.nr]; - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); - -skip_guest: - i = vmx_find_msr_index(&m->host, msr); - if (i < 0) - return; - - --m->host.nr; - m->host.val[i] = m->host.val[m->host.nr]; - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); -} - -static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, - unsigned long entry, unsigned long exit, - unsigned long guest_val_vmcs, unsigned long host_val_vmcs, - u64 guest_val, u64 host_val) -{ - vmcs_write64(guest_val_vmcs, guest_val); - if (host_val_vmcs != HOST_IA32_EFER) - vmcs_write64(host_val_vmcs, host_val); - vm_entry_controls_setbit(vmx, entry); - vm_exit_controls_setbit(vmx, exit); -} - -static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, - u64 guest_val, u64 host_val, bool entry_only) -{ - int i, j = 0; - struct msr_autoload *m = &vmx->msr_autoload; - - switch (msr) { - case MSR_EFER: - if (cpu_has_load_ia32_efer()) { - add_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_EFER, - VM_EXIT_LOAD_IA32_EFER, - GUEST_IA32_EFER, - HOST_IA32_EFER, - guest_val, host_val); - return; - } - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - if (cpu_has_load_perf_global_ctrl()) { - add_atomic_switch_msr_special(vmx, - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, - GUEST_IA32_PERF_GLOBAL_CTRL, - HOST_IA32_PERF_GLOBAL_CTRL, - guest_val, host_val); - return; - } - break; - case MSR_IA32_PEBS_ENABLE: - /* PEBS needs a quiescent period after being disabled (to write - * a record). Disabling PEBS through VMX MSR swapping doesn't - * provide that period, so a CPU could write host's record into - * guest's memory. - */ - wrmsrl(MSR_IA32_PEBS_ENABLE, 0); - } - - i = vmx_find_msr_index(&m->guest, msr); - if (!entry_only) - j = vmx_find_msr_index(&m->host, msr); - - if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) || - (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) { - printk_once(KERN_WARNING "Not enough msr switch entries. " - "Can't add msr %x\n", msr); - return; - } - if (i < 0) { - i = m->guest.nr++; - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); - } - m->guest.val[i].index = msr; - m->guest.val[i].value = guest_val; - - if (entry_only) - return; - - if (j < 0) { - j = m->host.nr++; - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); - } - m->host.val[j].index = msr; - m->host.val[j].value = host_val; -} - -static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) -{ - u64 guest_efer = vmx->vcpu.arch.efer; - u64 ignore_bits = 0; - - /* Shadow paging assumes NX to be available. */ - if (!enable_ept) - guest_efer |= EFER_NX; - - /* - * LMA and LME handled by hardware; SCE meaningless outside long mode. - */ - ignore_bits |= EFER_SCE; -#ifdef CONFIG_X86_64 - ignore_bits |= EFER_LMA | EFER_LME; - /* SCE is meaningful only in long mode on Intel */ - if (guest_efer & EFER_LMA) - ignore_bits &= ~(u64)EFER_SCE; -#endif - - /* - * On EPT, we can't emulate NX, so we must switch EFER atomically. - * On CPUs that support "load IA32_EFER", always switch EFER - * atomically, since it's faster than switching it manually. - */ - if (cpu_has_load_ia32_efer() || - (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { - if (!(guest_efer & EFER_LMA)) - guest_efer &= ~EFER_LME; - if (guest_efer != host_efer) - add_atomic_switch_msr(vmx, MSR_EFER, - guest_efer, host_efer, false); - else - clear_atomic_switch_msr(vmx, MSR_EFER); - return false; - } else { - clear_atomic_switch_msr(vmx, MSR_EFER); - - guest_efer &= ~ignore_bits; - guest_efer |= host_efer & ignore_bits; - - vmx->guest_msrs[efer_offset].data = guest_efer; - vmx->guest_msrs[efer_offset].mask = ~ignore_bits; - - return true; - } -} - -#ifdef CONFIG_X86_32 -/* - * On 32-bit kernels, VM exits still load the FS and GS bases from the - * VMCS rather than the segment table. KVM uses this helper to figure - * out the current bases to poke them into the VMCS before entry. - */ -static unsigned long segment_base(u16 selector) -{ - struct desc_struct *table; - unsigned long v; - - if (!(selector & ~SEGMENT_RPL_MASK)) - return 0; - - table = get_current_gdt_ro(); - - if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) { - u16 ldt_selector = kvm_read_ldt(); - - if (!(ldt_selector & ~SEGMENT_RPL_MASK)) - return 0; - - table = (struct desc_struct *)segment_base(ldt_selector); - } - v = get_desc_base(&table[selector >> 3]); - return v; -} -#endif - -static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range) -{ - u32 i; - - wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status); - wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); - wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); - wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); - for (i = 0; i < addr_range; i++) { - wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); - wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); - } -} - -static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range) -{ - u32 i; - - rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status); - rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); - rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); - rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); - for (i = 0; i < addr_range; i++) { - rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); - rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); - } -} - -static void pt_guest_enter(struct vcpu_vmx *vmx) -{ - if (pt_mode == PT_MODE_SYSTEM) - return; - - /* - * GUEST_IA32_RTIT_CTL is already set in the VMCS. - * Save host state before VM entry. - */ - rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); - if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { - wrmsrl(MSR_IA32_RTIT_CTL, 0); - pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range); - pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range); - } -} - -static void pt_guest_exit(struct vcpu_vmx *vmx) -{ - if (pt_mode == PT_MODE_SYSTEM) - return; - - if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { - pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range); - pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range); - } - - /* Reload host state (IA32_RTIT_CTL will be cleared on VM exit). */ - wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); -} - -void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, - unsigned long fs_base, unsigned long gs_base) -{ - if (unlikely(fs_sel != host->fs_sel)) { - if (!(fs_sel & 7)) - vmcs_write16(HOST_FS_SELECTOR, fs_sel); - else - vmcs_write16(HOST_FS_SELECTOR, 0); - host->fs_sel = fs_sel; - } - if (unlikely(gs_sel != host->gs_sel)) { - if (!(gs_sel & 7)) - vmcs_write16(HOST_GS_SELECTOR, gs_sel); - else - vmcs_write16(HOST_GS_SELECTOR, 0); - host->gs_sel = gs_sel; - } - if (unlikely(fs_base != host->fs_base)) { - vmcs_writel(HOST_FS_BASE, fs_base); - host->fs_base = fs_base; - } - if (unlikely(gs_base != host->gs_base)) { - vmcs_writel(HOST_GS_BASE, gs_base); - host->gs_base = gs_base; - } -} - -void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct vmcs_host_state *host_state; -#ifdef CONFIG_X86_64 - int cpu = raw_smp_processor_id(); -#endif - unsigned long fs_base, gs_base; - u16 fs_sel, gs_sel; - int i; - - vmx->req_immediate_exit = false; - - /* - * Note that guest MSRs to be saved/restored can also be changed - * when guest state is loaded. This happens when guest transitions - * to/from long-mode by setting MSR_EFER.LMA. - */ - if (!vmx->guest_msrs_ready) { - vmx->guest_msrs_ready = true; - for (i = 0; i < vmx->save_nmsrs; ++i) - kvm_set_shared_msr(vmx->guest_msrs[i].index, - vmx->guest_msrs[i].data, - vmx->guest_msrs[i].mask); - - } - if (vmx->guest_state_loaded) - return; - - host_state = &vmx->loaded_vmcs->host_state; - - /* - * Set host fs and gs selectors. Unfortunately, 22.2.3 does not - * allow segment selectors with cpl > 0 or ti == 1. - */ - host_state->ldt_sel = kvm_read_ldt(); - -#ifdef CONFIG_X86_64 - savesegment(ds, host_state->ds_sel); - savesegment(es, host_state->es_sel); - - gs_base = cpu_kernelmode_gs_base(cpu); - if (likely(is_64bit_mm(current->mm))) { - save_fsgs_for_kvm(); - fs_sel = current->thread.fsindex; - gs_sel = current->thread.gsindex; - fs_base = current->thread.fsbase; - vmx->msr_host_kernel_gs_base = current->thread.gsbase; - } else { - savesegment(fs, fs_sel); - savesegment(gs, gs_sel); - fs_base = read_msr(MSR_FS_BASE); - vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); - } - - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); -#else - savesegment(fs, fs_sel); - savesegment(gs, gs_sel); - fs_base = segment_base(fs_sel); - gs_base = segment_base(gs_sel); -#endif - - vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); - vmx->guest_state_loaded = true; -} - -static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) -{ - struct vmcs_host_state *host_state; - - if (!vmx->guest_state_loaded) - return; - - host_state = &vmx->loaded_vmcs->host_state; - - ++vmx->vcpu.stat.host_state_reload; - -#ifdef CONFIG_X86_64 - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); -#endif - if (host_state->ldt_sel || (host_state->gs_sel & 7)) { - kvm_load_ldt(host_state->ldt_sel); -#ifdef CONFIG_X86_64 - load_gs_index(host_state->gs_sel); -#else - loadsegment(gs, host_state->gs_sel); -#endif - } - if (host_state->fs_sel & 7) - loadsegment(fs, host_state->fs_sel); -#ifdef CONFIG_X86_64 - if (unlikely(host_state->ds_sel | host_state->es_sel)) { - loadsegment(ds, host_state->ds_sel); - loadsegment(es, host_state->es_sel); - } -#endif - invalidate_tss_limit(); -#ifdef CONFIG_X86_64 - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); -#endif - load_fixmap_gdt(raw_smp_processor_id()); - vmx->guest_state_loaded = false; - vmx->guest_msrs_ready = false; -} - -#ifdef CONFIG_X86_64 -static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) -{ - preempt_disable(); - if (vmx->guest_state_loaded) - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); - preempt_enable(); - return vmx->msr_guest_kernel_gs_base; -} - -static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) -{ - preempt_disable(); - if (vmx->guest_state_loaded) - wrmsrl(MSR_KERNEL_GS_BASE, data); - preempt_enable(); - vmx->msr_guest_kernel_gs_base = data; -} -#endif - -static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct pi_desc old, new; - unsigned int dest; - - /* - * In case of hot-plug or hot-unplug, we may have to undo - * vmx_vcpu_pi_put even if there is no assigned device. And we - * always keep PI.NDST up to date for simplicity: it makes the - * code easier, and CPU migration is not a fast path. - */ - if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) - return; - - /* - * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change - * PI.NDST: pi_post_block is the one expected to change PID.NDST and the - * wakeup handler expects the vCPU to be on the blocked_vcpu_list that - * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up - * correctly. - */ - if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) { - pi_clear_sn(pi_desc); - goto after_clear_sn; - } - - /* The full case. */ - do { - old.control = new.control = pi_desc->control; - - dest = cpu_physical_id(cpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - new.sn = 0; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); - -after_clear_sn: - - /* - * Clear SN before reading the bitmap. The VT-d firmware - * writes the bitmap and reads SN atomically (5.2.3 in the - * spec), so it doesn't really have a memory barrier that - * pairs with this, but we cannot do that and we need one. - */ - smp_mb__after_atomic(); - - if (!pi_is_pir_empty(pi_desc)) - pi_set_on(pi_desc); -} - -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool already_loaded = vmx->loaded_vmcs->cpu == cpu; - - if (!already_loaded) { - loaded_vmcs_clear(vmx->loaded_vmcs); - local_irq_disable(); - crash_disable_local_vmclear(cpu); - - /* - * Read loaded_vmcs->cpu should be before fetching - * loaded_vmcs->loaded_vmcss_on_cpu_link. - * See the comments in __loaded_vmcs_clear(). - */ - smp_rmb(); - - list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, - &per_cpu(loaded_vmcss_on_cpu, cpu)); - crash_enable_local_vmclear(cpu); - local_irq_enable(); - } - - if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { - per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; - vmcs_load(vmx->loaded_vmcs->vmcs); - indirect_branch_prediction_barrier(); - } - - if (!already_loaded) { - void *gdt = get_current_gdt_ro(); - unsigned long sysenter_esp; - - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - - /* - * Linux uses per-cpu TSS and GDT, so set these when switching - * processors. See 22.2.4. - */ - vmcs_writel(HOST_TR_BASE, - (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); - vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ - - rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); - vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ - - vmx->loaded_vmcs->cpu = cpu; - } - - /* Setup TSC multiplier */ - if (kvm_has_tsc_control && - vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) - decache_tsc_multiplier(vmx); -} - -/* - * Switches to specified vcpu, until a matching vcpu_put(), but assumes - * vcpu mutex is already taken. - */ -void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - vmx_vcpu_load_vmcs(vcpu, cpu); - - vmx_vcpu_pi_load(vcpu, cpu); - - vmx->host_pkru = read_pkru(); - vmx->host_debugctlmsr = get_debugctlmsr(); -} - -static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) - return; - - /* Set SN when the vCPU is preempted */ - if (vcpu->preempted) - pi_set_sn(pi_desc); -} - -static void vmx_vcpu_put(struct kvm_vcpu *vcpu) -{ - vmx_vcpu_pi_put(vcpu); - - vmx_prepare_switch_to_host(to_vmx(vcpu)); -} - -static bool emulation_required(struct kvm_vcpu *vcpu) -{ - return emulate_invalid_guest_state && !guest_state_valid(vcpu); -} - -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); - -unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long rflags, save_rflags; - - if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) { - kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); - rflags = vmcs_readl(GUEST_RFLAGS); - if (vmx->rmode.vm86_active) { - rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - save_rflags = vmx->rmode.save_rflags; - rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; - } - vmx->rflags = rflags; - } - return vmx->rflags; -} - -void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long old_rflags; - - if (enable_unrestricted_guest) { - kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); - vmx->rflags = rflags; - vmcs_writel(GUEST_RFLAGS, rflags); - return; - } - - old_rflags = vmx_get_rflags(vcpu); - vmx->rflags = rflags; - if (vmx->rmode.vm86_active) { - vmx->rmode.save_rflags = rflags; - rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; - } - vmcs_writel(GUEST_RFLAGS, rflags); - - if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM) - vmx->emulation_required = emulation_required(vcpu); -} - -u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) -{ - u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - int ret = 0; - - if (interruptibility & GUEST_INTR_STATE_STI) - ret |= KVM_X86_SHADOW_INT_STI; - if (interruptibility & GUEST_INTR_STATE_MOV_SS) - ret |= KVM_X86_SHADOW_INT_MOV_SS; - - return ret; -} - -void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) -{ - u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - u32 interruptibility = interruptibility_old; - - interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); - - if (mask & KVM_X86_SHADOW_INT_MOV_SS) - interruptibility |= GUEST_INTR_STATE_MOV_SS; - else if (mask & KVM_X86_SHADOW_INT_STI) - interruptibility |= GUEST_INTR_STATE_STI; - - if ((interruptibility != interruptibility_old)) - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); -} - -static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long value; - - /* - * Any MSR write that attempts to change bits marked reserved will - * case a #GP fault. - */ - if (data & vmx->pt_desc.ctl_bitmask) - return 1; - - /* - * Any attempt to modify IA32_RTIT_CTL while TraceEn is set will - * result in a #GP unless the same write also clears TraceEn. - */ - if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) && - ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN)) - return 1; - - /* - * WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit - * and FabricEn would cause #GP, if - * CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0 - */ - if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) && - !(data & RTIT_CTL_FABRIC_EN) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output)) - return 1; - - /* - * MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that - * utilize encodings marked reserved will casue a #GP fault. - */ - value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods); - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) && - !test_bit((data & RTIT_CTL_MTC_RANGE) >> - RTIT_CTL_MTC_RANGE_OFFSET, &value)) - return 1; - value = intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cycle_thresholds); - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && - !test_bit((data & RTIT_CTL_CYC_THRESH) >> - RTIT_CTL_CYC_THRESH_OFFSET, &value)) - return 1; - value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods); - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && - !test_bit((data & RTIT_CTL_PSB_FREQ) >> - RTIT_CTL_PSB_FREQ_OFFSET, &value)) - return 1; - - /* - * If ADDRx_CFG is reserved or the encodings is >2 will - * cause a #GP fault. - */ - value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2)) - return 1; - value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2)) - return 1; - value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2)) - return 1; - value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET; - if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2)) - return 1; - - return 0; -} - -static int skip_emulated_instruction(struct kvm_vcpu *vcpu) -{ - unsigned long rip; - - /* - * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on - * undefined behavior: Intel's SDM doesn't mandate the VMCS field be - * set when EPT misconfig occurs. In practice, real hardware updates - * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors - * (namely Hyper-V) don't set it due to it being undefined behavior, - * i.e. we end up advancing IP with some random value. - */ - if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || - to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) { - rip = kvm_rip_read(vcpu); - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - kvm_rip_write(vcpu, rip); - } else { - if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) - return 0; - } - - /* skipping an emulated instruction also counts */ - vmx_set_interrupt_shadow(vcpu, 0); - - return 1; -} - -static void vmx_clear_hlt(struct kvm_vcpu *vcpu) -{ - /* - * Ensure that we clear the HLT state in the VMCS. We don't need to - * explicitly skip the instruction because if the HLT state is set, - * then the instruction is already executing and RIP has already been - * advanced. - */ - if (kvm_hlt_in_guest(vcpu->kvm) && - vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); -} - -static void vmx_queue_exception(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned nr = vcpu->arch.exception.nr; - bool has_error_code = vcpu->arch.exception.has_error_code; - u32 error_code = vcpu->arch.exception.error_code; - u32 intr_info = nr | INTR_INFO_VALID_MASK; - - kvm_deliver_exception_payload(vcpu); - - if (has_error_code) { - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); - intr_info |= INTR_INFO_DELIVER_CODE_MASK; - } - - if (vmx->rmode.vm86_active) { - int inc_eip = 0; - if (kvm_exception_is_soft(nr)) - inc_eip = vcpu->arch.event_exit_inst_len; - kvm_inject_realmode_interrupt(vcpu, nr, inc_eip); - return; - } - - WARN_ON_ONCE(vmx->emulation_required); - - if (kvm_exception_is_soft(nr)) { - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmx->vcpu.arch.event_exit_inst_len); - intr_info |= INTR_TYPE_SOFT_EXCEPTION; - } else - intr_info |= INTR_TYPE_HARD_EXCEPTION; - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); - - vmx_clear_hlt(vcpu); -} - -static bool vmx_rdtscp_supported(void) -{ - return cpu_has_vmx_rdtscp(); -} - -static bool vmx_invpcid_supported(void) -{ - return cpu_has_vmx_invpcid(); -} - -/* - * Swap MSR entry in host/guest MSR entry array. - */ -static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) -{ - struct shared_msr_entry tmp; - - tmp = vmx->guest_msrs[to]; - vmx->guest_msrs[to] = vmx->guest_msrs[from]; - vmx->guest_msrs[from] = tmp; -} - -/* - * Set up the vmcs to automatically save and restore system - * msrs. Don't touch the 64-bit msrs if the guest is in legacy - * mode, as fiddling with msrs is very expensive. - */ -static void setup_msrs(struct vcpu_vmx *vmx) -{ - int save_nmsrs, index; - - save_nmsrs = 0; -#ifdef CONFIG_X86_64 - /* - * The SYSCALL MSRs are only needed on long mode guests, and only - * when EFER.SCE is set. - */ - if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) { - index = __find_msr_index(vmx, MSR_STAR); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_LSTAR); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_SYSCALL_MASK); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - } -#endif - index = __find_msr_index(vmx, MSR_EFER); - if (index >= 0 && update_transition_efer(vmx, index)) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_TSC_AUX); - if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - - vmx->save_nmsrs = save_nmsrs; - vmx->guest_msrs_ready = false; - - if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(&vmx->vcpu); -} - -static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - - if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) - return vcpu->arch.tsc_offset - vmcs12->tsc_offset; - - return vcpu->arch.tsc_offset; -} - -static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - u64 g_tsc_offset = 0; - - /* - * We're here if L1 chose not to trap WRMSR to TSC. According - * to the spec, this should set L1's TSC; The offset that L1 - * set for L2 remains unchanged, and still needs to be added - * to the newly set TSC to get L2's TSC. - */ - if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) - g_tsc_offset = vmcs12->tsc_offset; - - trace_kvm_write_tsc_offset(vcpu->vcpu_id, - vcpu->arch.tsc_offset - g_tsc_offset, - offset); - vmcs_write64(TSC_OFFSET, offset + g_tsc_offset); - return offset + g_tsc_offset; -} - -/* - * nested_vmx_allowed() checks whether a guest should be allowed to use VMX - * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for - * all guests if the "nested" module option is off, and can also be disabled - * for a single guest by disabling its VMX cpuid bit. - */ -bool nested_vmx_allowed(struct kvm_vcpu *vcpu) -{ - return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); -} - -static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, - uint64_t val) -{ - uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits; - - return !(val & ~valid_bits); -} - -static int vmx_get_msr_feature(struct kvm_msr_entry *msr) -{ - switch (msr->index) { - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - if (!nested) - return 1; - return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); - default: - return 1; - } -} - -/* - * Reads an msr value (of 'msr_index') into 'pdata'. - * Returns 0 on success, non-0 otherwise. - * Assumes vcpu_load() was already called. - */ -static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct shared_msr_entry *msr; - u32 index; - - switch (msr_info->index) { -#ifdef CONFIG_X86_64 - case MSR_FS_BASE: - msr_info->data = vmcs_readl(GUEST_FS_BASE); - break; - case MSR_GS_BASE: - msr_info->data = vmcs_readl(GUEST_GS_BASE); - break; - case MSR_KERNEL_GS_BASE: - msr_info->data = vmx_read_guest_kernel_gs_base(vmx); - break; -#endif - case MSR_EFER: - return kvm_get_msr_common(vcpu, msr_info); - case MSR_IA32_TSX_CTRL: - if (!msr_info->host_initiated && - !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) - return 1; - goto find_shared_msr; - case MSR_IA32_UMWAIT_CONTROL: - if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) - return 1; - - msr_info->data = vmx->msr_ia32_umwait_control; - break; - case MSR_IA32_SPEC_CTRL: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) - return 1; - - msr_info->data = to_vmx(vcpu)->spec_ctrl; - break; - case MSR_IA32_SYSENTER_CS: - msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); - break; - case MSR_IA32_SYSENTER_EIP: - msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP); - break; - case MSR_IA32_SYSENTER_ESP: - msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); - break; - case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported() || - (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) - return 1; - msr_info->data = vmcs_read64(GUEST_BNDCFGS); - break; - case MSR_IA32_MCG_EXT_CTL: - if (!msr_info->host_initiated && - !(vmx->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) - return 1; - msr_info->data = vcpu->arch.mcg_ext_ctl; - break; - case MSR_IA32_FEATURE_CONTROL: - msr_info->data = vmx->msr_ia32_feature_control; - break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - if (!nested_vmx_allowed(vcpu)) - return 1; - return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, - &msr_info->data); - case MSR_IA32_RTIT_CTL: - if (pt_mode != PT_MODE_HOST_GUEST) - return 1; - msr_info->data = vmx->pt_desc.guest.ctl; - break; - case MSR_IA32_RTIT_STATUS: - if (pt_mode != PT_MODE_HOST_GUEST) - return 1; - msr_info->data = vmx->pt_desc.guest.status; - break; - case MSR_IA32_RTIT_CR3_MATCH: - if ((pt_mode != PT_MODE_HOST_GUEST) || - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cr3_filtering)) - return 1; - msr_info->data = vmx->pt_desc.guest.cr3_match; - break; - case MSR_IA32_RTIT_OUTPUT_BASE: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) - return 1; - msr_info->data = vmx->pt_desc.guest.output_base; - break; - case MSR_IA32_RTIT_OUTPUT_MASK: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) - return 1; - msr_info->data = vmx->pt_desc.guest.output_mask; - break; - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: - index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; - if ((pt_mode != PT_MODE_HOST_GUEST) || - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges))) - return 1; - if (is_noncanonical_address(data, vcpu)) - return 1; - if (index % 2) - msr_info->data = vmx->pt_desc.guest.addr_b[index / 2]; - else - msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; - break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) - return 1; - goto find_shared_msr; - default: - find_shared_msr: - msr = find_msr_entry(vmx, msr_info->index); - if (msr) { - msr_info->data = msr->data; - break; - } - return kvm_get_msr_common(vcpu, msr_info); - } - - return 0; -} - -/* - * Writes msr value into the appropriate "register". - * Returns 0 on success, non-0 otherwise. - * Assumes vcpu_load() was already called. - */ -static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct shared_msr_entry *msr; - int ret = 0; - u32 msr_index = msr_info->index; - u64 data = msr_info->data; - u32 index; - - switch (msr_index) { - case MSR_EFER: - ret = kvm_set_msr_common(vcpu, msr_info); - break; -#ifdef CONFIG_X86_64 - case MSR_FS_BASE: - vmx_segment_cache_clear(vmx); - vmcs_writel(GUEST_FS_BASE, data); - break; - case MSR_GS_BASE: - vmx_segment_cache_clear(vmx); - vmcs_writel(GUEST_GS_BASE, data); - break; - case MSR_KERNEL_GS_BASE: - vmx_write_guest_kernel_gs_base(vmx, data); - break; -#endif - case MSR_IA32_SYSENTER_CS: - if (is_guest_mode(vcpu)) - get_vmcs12(vcpu)->guest_sysenter_cs = data; - vmcs_write32(GUEST_SYSENTER_CS, data); - break; - case MSR_IA32_SYSENTER_EIP: - if (is_guest_mode(vcpu)) - get_vmcs12(vcpu)->guest_sysenter_eip = data; - vmcs_writel(GUEST_SYSENTER_EIP, data); - break; - case MSR_IA32_SYSENTER_ESP: - if (is_guest_mode(vcpu)) - get_vmcs12(vcpu)->guest_sysenter_esp = data; - vmcs_writel(GUEST_SYSENTER_ESP, data); - break; - case MSR_IA32_DEBUGCTLMSR: - if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & - VM_EXIT_SAVE_DEBUG_CONTROLS) - get_vmcs12(vcpu)->guest_ia32_debugctl = data; - - ret = kvm_set_msr_common(vcpu, msr_info); - break; - - case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported() || - (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) - return 1; - if (is_noncanonical_address(data & PAGE_MASK, vcpu) || - (data & MSR_IA32_BNDCFGS_RSVD)) - return 1; - vmcs_write64(GUEST_BNDCFGS, data); - break; - case MSR_IA32_UMWAIT_CONTROL: - if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) - return 1; - - /* The reserved bit 1 and non-32 bit [63:32] should be zero */ - if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) - return 1; - - vmx->msr_ia32_umwait_control = data; - break; - case MSR_IA32_SPEC_CTRL: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) - return 1; - - /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) - return 1; - - vmx->spec_ctrl = data; - - if (!data) - break; - - /* - * For non-nested: - * When it's written (to non-zero) for the first time, pass - * it through. - * - * For nested: - * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_prepare_msr_bitmap. We should not touch the - * vmcs02.msr_bitmap here since it gets completely overwritten - * in the merging. We update the vmcs01 here for L1 as well - * since it will end up touching the MSR anyway now. - */ - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, - MSR_IA32_SPEC_CTRL, - MSR_TYPE_RW); - break; - case MSR_IA32_TSX_CTRL: - if (!msr_info->host_initiated && - !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) - return 1; - if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR)) - return 1; - goto find_shared_msr; - case MSR_IA32_PRED_CMD: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) - return 1; - - if (data & ~PRED_CMD_IBPB) - return 1; - - if (!data) - break; - - wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); - - /* - * For non-nested: - * When it's written (to non-zero) for the first time, pass - * it through. - * - * For nested: - * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_prepare_msr_bitmap. We should not touch the - * vmcs02.msr_bitmap here since it gets completely overwritten - * in the merging. - */ - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, - MSR_TYPE_W); - break; - case MSR_IA32_CR_PAT: - if (!kvm_pat_valid(data)) - return 1; - - if (is_guest_mode(vcpu) && - get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) - get_vmcs12(vcpu)->guest_ia32_pat = data; - - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - vmcs_write64(GUEST_IA32_PAT, data); - vcpu->arch.pat = data; - break; - } - ret = kvm_set_msr_common(vcpu, msr_info); - break; - case MSR_IA32_TSC_ADJUST: - ret = kvm_set_msr_common(vcpu, msr_info); - break; - case MSR_IA32_MCG_EXT_CTL: - if ((!msr_info->host_initiated && - !(to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) || - (data & ~MCG_EXT_CTL_LMCE_EN)) - return 1; - vcpu->arch.mcg_ext_ctl = data; - break; - case MSR_IA32_FEATURE_CONTROL: - if (!vmx_feature_control_msr_valid(vcpu, data) || - (to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) - return 1; - vmx->msr_ia32_feature_control = data; - if (msr_info->host_initiated && data == 0) - vmx_leave_nested(vcpu); - break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - if (!msr_info->host_initiated) - return 1; /* they are read-only */ - if (!nested_vmx_allowed(vcpu)) - return 1; - return vmx_set_vmx_msr(vcpu, msr_index, data); - case MSR_IA32_RTIT_CTL: - if ((pt_mode != PT_MODE_HOST_GUEST) || - vmx_rtit_ctl_check(vcpu, data) || - vmx->nested.vmxon) - return 1; - vmcs_write64(GUEST_IA32_RTIT_CTL, data); - vmx->pt_desc.guest.ctl = data; - pt_update_intercept_for_msr(vmx); - break; - case MSR_IA32_RTIT_STATUS: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (data & MSR_IA32_RTIT_STATUS_MASK)) - return 1; - vmx->pt_desc.guest.status = data; - break; - case MSR_IA32_RTIT_CR3_MATCH: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cr3_filtering)) - return 1; - vmx->pt_desc.guest.cr3_match = data; - break; - case MSR_IA32_RTIT_OUTPUT_BASE: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output)) || - (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK)) - return 1; - vmx->pt_desc.guest.output_base = data; - break; - case MSR_IA32_RTIT_OUTPUT_MASK: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) - return 1; - vmx->pt_desc.guest.output_mask = data; - break; - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: - index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges))) - return 1; - if (is_noncanonical_address(data, vcpu)) - return 1; - if (index % 2) - vmx->pt_desc.guest.addr_b[index / 2] = data; - else - vmx->pt_desc.guest.addr_a[index / 2] = data; - break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) - return 1; - /* Check reserved bit, higher 32 bits should be zero */ - if ((data >> 32) != 0) - return 1; - goto find_shared_msr; - - default: - find_shared_msr: - msr = find_msr_entry(vmx, msr_index); - if (msr) - ret = vmx_set_guest_msr(vmx, msr, data); - else - ret = kvm_set_msr_common(vcpu, msr_info); - } - - return ret; -} - -static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) -{ - kvm_register_mark_available(vcpu, reg); - - switch (reg) { - case VCPU_REGS_RSP: - vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); - break; - case VCPU_REGS_RIP: - vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); - break; - case VCPU_EXREG_PDPTR: - if (enable_ept) - ept_save_pdptrs(vcpu); - break; - case VCPU_EXREG_CR3: - if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - break; - default: - WARN_ON_ONCE(1); - break; - } -} - -static __init int cpu_has_kvm_support(void) -{ - return cpu_has_vmx(); -} - -static __init int vmx_disabled_by_bios(void) -{ - u64 msr; - - rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); - if (msr & FEATURE_CONTROL_LOCKED) { - /* launched w/ TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && tboot_enabled()) - return 1; - /* launched w/o TXT and VMX only enabled w/ TXT */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && !tboot_enabled()) { - printk(KERN_WARNING "kvm: disable TXT in the BIOS or " - "activate TXT before enabling KVM\n"); - return 1; - } - /* launched w/o TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && !tboot_enabled()) - return 1; - } - - return 0; -} - -static void kvm_cpu_vmxon(u64 addr) -{ - cr4_set_bits(X86_CR4_VMXE); - intel_pt_handle_vmx(1); - - asm volatile ("vmxon %0" : : "m"(addr)); -} - -static int hardware_enable(void) -{ - int cpu = raw_smp_processor_id(); - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); - u64 old, test_bits; - - if (cr4_read_shadow() & X86_CR4_VMXE) - return -EBUSY; - - /* - * This can happen if we hot-added a CPU but failed to allocate - * VP assist page for it. - */ - if (static_branch_unlikely(&enable_evmcs) && - !hv_get_vp_assist_page(cpu)) - return -EFAULT; - - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); - INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); - spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); - - /* - * Now we can enable the vmclear operation in kdump - * since the loaded_vmcss_on_cpu list on this cpu - * has been initialized. - * - * Though the cpu is not in VMX operation now, there - * is no problem to enable the vmclear operation - * for the loaded_vmcss_on_cpu list is empty! - */ - crash_enable_local_vmclear(cpu); - - rdmsrl(MSR_IA32_FEATURE_CONTROL, old); - - test_bits = FEATURE_CONTROL_LOCKED; - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - if (tboot_enabled()) - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; - - if ((old & test_bits) != test_bits) { - /* enable and lock */ - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); - } - kvm_cpu_vmxon(phys_addr); - if (enable_ept) - ept_sync_global(); - - return 0; -} - -static void vmclear_local_loaded_vmcss(void) -{ - int cpu = raw_smp_processor_id(); - struct loaded_vmcs *v, *n; - - list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), - loaded_vmcss_on_cpu_link) - __loaded_vmcs_clear(v); -} - - -/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() - * tricks. - */ -static void kvm_cpu_vmxoff(void) -{ - asm volatile (__ex("vmxoff")); - - intel_pt_handle_vmx(0); - cr4_clear_bits(X86_CR4_VMXE); -} - -static void hardware_disable(void) -{ - vmclear_local_loaded_vmcss(); - kvm_cpu_vmxoff(); -} - -static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, - u32 msr, u32 *result) -{ - u32 vmx_msr_low, vmx_msr_high; - u32 ctl = ctl_min | ctl_opt; - - rdmsr(msr, vmx_msr_low, vmx_msr_high); - - ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */ - ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */ - - /* Ensure minimum (required) set of control bits are supported. */ - if (ctl_min & ~ctl) - return -EIO; - - *result = ctl; - return 0; -} - -static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, - struct vmx_capability *vmx_cap) -{ - u32 vmx_msr_low, vmx_msr_high; - u32 min, opt, min2, opt2; - u32 _pin_based_exec_control = 0; - u32 _cpu_based_exec_control = 0; - u32 _cpu_based_2nd_exec_control = 0; - u32 _vmexit_control = 0; - u32 _vmentry_control = 0; - - memset(vmcs_conf, 0, sizeof(*vmcs_conf)); - min = CPU_BASED_HLT_EXITING | -#ifdef CONFIG_X86_64 - CPU_BASED_CR8_LOAD_EXITING | - CPU_BASED_CR8_STORE_EXITING | -#endif - CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_UNCOND_IO_EXITING | - CPU_BASED_MOV_DR_EXITING | - CPU_BASED_USE_TSC_OFFSETTING | - CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING | - CPU_BASED_INVLPG_EXITING | - CPU_BASED_RDPMC_EXITING; - - opt = CPU_BASED_TPR_SHADOW | - CPU_BASED_USE_MSR_BITMAPS | - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, - &_cpu_based_exec_control) < 0) - return -EIO; -#ifdef CONFIG_X86_64 - if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) - _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING & - ~CPU_BASED_CR8_STORE_EXITING; -#endif - if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { - min2 = 0; - opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_WBINVD_EXITING | - SECONDARY_EXEC_ENABLE_VPID | - SECONDARY_EXEC_ENABLE_EPT | - SECONDARY_EXEC_UNRESTRICTED_GUEST | - SECONDARY_EXEC_PAUSE_LOOP_EXITING | - SECONDARY_EXEC_DESC | - SECONDARY_EXEC_RDTSCP | - SECONDARY_EXEC_ENABLE_INVPCID | - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_SHADOW_VMCS | - SECONDARY_EXEC_XSAVES | - SECONDARY_EXEC_RDSEED_EXITING | - SECONDARY_EXEC_RDRAND_EXITING | - SECONDARY_EXEC_ENABLE_PML | - SECONDARY_EXEC_TSC_SCALING | - SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | - SECONDARY_EXEC_PT_USE_GPA | - SECONDARY_EXEC_PT_CONCEAL_VMX | - SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_ENCLS_EXITING; - if (adjust_vmx_controls(min2, opt2, - MSR_IA32_VMX_PROCBASED_CTLS2, - &_cpu_based_2nd_exec_control) < 0) - return -EIO; - } -#ifndef CONFIG_X86_64 - if (!(_cpu_based_2nd_exec_control & - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) - _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; -#endif - - if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) - _cpu_based_2nd_exec_control &= ~( - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - - rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, - &vmx_cap->ept, &vmx_cap->vpid); - - if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { - /* CR3 accesses and invlpg don't need to cause VM Exits when EPT - enabled */ - _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_INVLPG_EXITING); - } else if (vmx_cap->ept) { - vmx_cap->ept = 0; - pr_warn_once("EPT CAP should not exist if not support " - "1-setting enable EPT VM-execution control\n"); - } - if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) && - vmx_cap->vpid) { - vmx_cap->vpid = 0; - pr_warn_once("VPID CAP should not exist if not support " - "1-setting enable VPID VM-execution control\n"); - } - - min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT; -#ifdef CONFIG_X86_64 - min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; -#endif - opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | - VM_EXIT_LOAD_IA32_PAT | - VM_EXIT_LOAD_IA32_EFER | - VM_EXIT_CLEAR_BNDCFGS | - VM_EXIT_PT_CONCEAL_PIP | - VM_EXIT_CLEAR_IA32_RTIT_CTL; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, - &_vmexit_control) < 0) - return -EIO; - - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | - PIN_BASED_VMX_PREEMPTION_TIMER; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, - &_pin_based_exec_control) < 0) - return -EIO; - - if (cpu_has_broken_vmx_preemption_timer()) - _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; - if (!(_cpu_based_2nd_exec_control & - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)) - _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; - - min = VM_ENTRY_LOAD_DEBUG_CONTROLS; - opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | - VM_ENTRY_LOAD_IA32_PAT | - VM_ENTRY_LOAD_IA32_EFER | - VM_ENTRY_LOAD_BNDCFGS | - VM_ENTRY_PT_CONCEAL_PIP | - VM_ENTRY_LOAD_IA32_RTIT_CTL; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, - &_vmentry_control) < 0) - return -EIO; - - /* - * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they - * can't be used due to an errata where VM Exit may incorrectly clear - * IA32_PERF_GLOBAL_CTRL[34:32]. Workaround the errata by using the - * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL. - */ - if (boot_cpu_data.x86 == 0x6) { - switch (boot_cpu_data.x86_model) { - case 26: /* AAK155 */ - case 30: /* AAP115 */ - case 37: /* AAT100 */ - case 44: /* BC86,AAY89,BD102 */ - case 46: /* BA97 */ - _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; - _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; - pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " - "does not work properly. Using workaround\n"); - break; - default: - break; - } - } - - - rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); - - /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ - if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) - return -EIO; - -#ifdef CONFIG_X86_64 - /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ - if (vmx_msr_high & (1u<<16)) - return -EIO; -#endif - - /* Require Write-Back (WB) memory type for VMCS accesses. */ - if (((vmx_msr_high >> 18) & 15) != 6) - return -EIO; - - vmcs_conf->size = vmx_msr_high & 0x1fff; - vmcs_conf->order = get_order(vmcs_conf->size); - vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; - - vmcs_conf->revision_id = vmx_msr_low; - - vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; - vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; - vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; - vmcs_conf->vmexit_ctrl = _vmexit_control; - vmcs_conf->vmentry_ctrl = _vmentry_control; - - if (static_branch_unlikely(&enable_evmcs)) - evmcs_sanitize_exec_ctrls(vmcs_conf); - - return 0; -} - -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) -{ - int node = cpu_to_node(cpu); - struct page *pages; - struct vmcs *vmcs; - - pages = __alloc_pages_node(node, flags, vmcs_config.order); - if (!pages) - return NULL; - vmcs = page_address(pages); - memset(vmcs, 0, vmcs_config.size); - - /* KVM supports Enlightened VMCS v1 only */ - if (static_branch_unlikely(&enable_evmcs)) - vmcs->hdr.revision_id = KVM_EVMCS_VERSION; - else - vmcs->hdr.revision_id = vmcs_config.revision_id; - - if (shadow) - vmcs->hdr.shadow_vmcs = 1; - return vmcs; -} - -void free_vmcs(struct vmcs *vmcs) -{ - free_pages((unsigned long)vmcs, vmcs_config.order); -} - -/* - * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded - */ -void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) -{ - if (!loaded_vmcs->vmcs) - return; - loaded_vmcs_clear(loaded_vmcs); - free_vmcs(loaded_vmcs->vmcs); - loaded_vmcs->vmcs = NULL; - if (loaded_vmcs->msr_bitmap) - free_page((unsigned long)loaded_vmcs->msr_bitmap); - WARN_ON(loaded_vmcs->shadow_vmcs != NULL); -} - -int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) -{ - loaded_vmcs->vmcs = alloc_vmcs(false); - if (!loaded_vmcs->vmcs) - return -ENOMEM; - - loaded_vmcs->shadow_vmcs = NULL; - loaded_vmcs->hv_timer_soft_disabled = false; - loaded_vmcs_init(loaded_vmcs); - - if (cpu_has_vmx_msr_bitmap()) { - loaded_vmcs->msr_bitmap = (unsigned long *) - __get_free_page(GFP_KERNEL_ACCOUNT); - if (!loaded_vmcs->msr_bitmap) - goto out_vmcs; - memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); - - if (IS_ENABLED(CONFIG_HYPERV) && - static_branch_unlikely(&enable_evmcs) && - (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { - struct hv_enlightened_vmcs *evmcs = - (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs; - - evmcs->hv_enlightenments_control.msr_bitmap = 1; - } - } - - memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); - memset(&loaded_vmcs->controls_shadow, 0, - sizeof(struct vmcs_controls_shadow)); - - return 0; - -out_vmcs: - free_loaded_vmcs(loaded_vmcs); - return -ENOMEM; -} - -static void free_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - free_vmcs(per_cpu(vmxarea, cpu)); - per_cpu(vmxarea, cpu) = NULL; - } -} - -static __init int alloc_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct vmcs *vmcs; - - vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); - if (!vmcs) { - free_kvm_area(); - return -ENOMEM; - } - - /* - * When eVMCS is enabled, alloc_vmcs_cpu() sets - * vmcs->revision_id to KVM_EVMCS_VERSION instead of - * revision_id reported by MSR_IA32_VMX_BASIC. - * - * However, even though not explicitly documented by - * TLFS, VMXArea passed as VMXON argument should - * still be marked with revision_id reported by - * physical CPU. - */ - if (static_branch_unlikely(&enable_evmcs)) - vmcs->hdr.revision_id = vmcs_config.revision_id; - - per_cpu(vmxarea, cpu) = vmcs; - } - return 0; -} - -static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, - struct kvm_segment *save) -{ - if (!emulate_invalid_guest_state) { - /* - * CS and SS RPL should be equal during guest entry according - * to VMX spec, but in reality it is not always so. Since vcpu - * is in the middle of the transition from real mode to - * protected mode it is safe to assume that RPL 0 is a good - * default value. - */ - if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) - save->selector &= ~SEGMENT_RPL_MASK; - save->dpl = save->selector & SEGMENT_RPL_MASK; - save->s = 1; - } - vmx_set_segment(vcpu, save, seg); -} - -static void enter_pmode(struct kvm_vcpu *vcpu) -{ - unsigned long flags; - struct vcpu_vmx *vmx = to_vmx(vcpu); - - /* - * Update real mode segment cache. It may be not up-to-date if sement - * register was written while vcpu was in a guest mode. - */ - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); - - vmx->rmode.vm86_active = 0; - - vmx_segment_cache_clear(vmx); - - vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); - - flags = vmcs_readl(GUEST_RFLAGS); - flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; - flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; - vmcs_writel(GUEST_RFLAGS, flags); - - vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | - (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME)); - - update_exception_bitmap(vcpu); - - fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); - fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); - fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); - fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); - fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); - fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); -} - -static void fix_rmode_seg(int seg, struct kvm_segment *save) -{ - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - struct kvm_segment var = *save; - - var.dpl = 0x3; - if (seg == VCPU_SREG_CS) - var.type = 0x3; - - if (!emulate_invalid_guest_state) { - var.selector = var.base >> 4; - var.base = var.base & 0xffff0; - var.limit = 0xffff; - var.g = 0; - var.db = 0; - var.present = 1; - var.s = 1; - var.l = 0; - var.unusable = 0; - var.type = 0x3; - var.avl = 0; - if (save->base & 0xf) - printk_once(KERN_WARNING "kvm: segment base is not " - "paragraph aligned when entering " - "protected mode (seg=%d)", seg); - } - - vmcs_write16(sf->selector, var.selector); - vmcs_writel(sf->base, var.base); - vmcs_write32(sf->limit, var.limit); - vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); -} - -static void enter_rmode(struct kvm_vcpu *vcpu) -{ - unsigned long flags; - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); - - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); - - vmx->rmode.vm86_active = 1; - - /* - * Very old userspace does not call KVM_SET_TSS_ADDR before entering - * vcpu. Warn the user that an update is overdue. - */ - if (!kvm_vmx->tss_addr) - printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " - "called before entering vcpu\n"); - - vmx_segment_cache_clear(vmx); - - vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr); - vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); - vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); - - flags = vmcs_readl(GUEST_RFLAGS); - vmx->rmode.save_rflags = flags; - - flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; - - vmcs_writel(GUEST_RFLAGS, flags); - vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); - update_exception_bitmap(vcpu); - - fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); - fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); - fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); - fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); - fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); - fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); - - kvm_mmu_reset_context(vcpu); -} - -void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); - - if (!msr) - return; - - vcpu->arch.efer = efer; - if (efer & EFER_LMA) { - vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); - msr->data = efer; - } else { - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); - - msr->data = efer & ~EFER_LME; - } - setup_msrs(vmx); -} - -#ifdef CONFIG_X86_64 - -static void enter_lmode(struct kvm_vcpu *vcpu) -{ - u32 guest_tr_ar; - - vmx_segment_cache_clear(to_vmx(vcpu)); - - guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); - if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) { - pr_debug_ratelimited("%s: tss fixup for long mode. \n", - __func__); - vmcs_write32(GUEST_TR_AR_BYTES, - (guest_tr_ar & ~VMX_AR_TYPE_MASK) - | VMX_AR_TYPE_BUSY_64_TSS); - } - vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); -} - -static void exit_lmode(struct kvm_vcpu *vcpu) -{ - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); - vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); -} - -#endif - -static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) -{ - int vpid = to_vmx(vcpu)->vpid; - - if (!vpid_sync_vcpu_addr(vpid, addr)) - vpid_sync_context(vpid); - - /* - * If VPIDs are not supported or enabled, then the above is a no-op. - * But we don't really need a TLB flush in that case anyway, because - * each VM entry/exit includes an implicit flush when VPID is 0. - */ -} - -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) -{ - ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; - - vcpu->arch.cr0 &= ~cr0_guest_owned_bits; - vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; -} - -static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) -{ - ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; - - vcpu->arch.cr4 &= ~cr4_guest_owned_bits; - vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; -} - -static void ept_load_pdptrs(struct kvm_vcpu *vcpu) -{ - struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - - if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR)) - return; - - if (is_pae_paging(vcpu)) { - vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); - vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); - vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); - vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]); - } -} - -void ept_save_pdptrs(struct kvm_vcpu *vcpu) -{ - struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - - if (is_pae_paging(vcpu)) { - mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); - mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); - mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); - mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); - } - - kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); -} - -static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, - unsigned long cr0, - struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) - vmx_cache_reg(vcpu, VCPU_EXREG_CR3); - if (!(cr0 & X86_CR0_PG)) { - /* From paging/starting to nonpaging */ - exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } else if (!is_paging(vcpu)) { - /* From nonpaging to paging */ - exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } - - if (!(cr0 & X86_CR0_WP)) - *hw_cr0 &= ~X86_CR0_WP; -} - -void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long hw_cr0; - - hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF); - if (enable_unrestricted_guest) - hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; - else { - hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; - - if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) - enter_pmode(vcpu); - - if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) - enter_rmode(vcpu); - } - -#ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME) { - if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) - enter_lmode(vcpu); - if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) - exit_lmode(vcpu); - } -#endif - - if (enable_ept && !enable_unrestricted_guest) - ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); - - vmcs_writel(CR0_READ_SHADOW, cr0); - vmcs_writel(GUEST_CR0, hw_cr0); - vcpu->arch.cr0 = cr0; - - /* depends on vcpu->arch.cr0 to be set to a new value */ - vmx->emulation_required = emulation_required(vcpu); -} - -static int get_ept_level(struct kvm_vcpu *vcpu) -{ - if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) - return 5; - return 4; -} - -u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) -{ - u64 eptp = VMX_EPTP_MT_WB; - - eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4; - - if (enable_ept_ad_bits && - (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) - eptp |= VMX_EPTP_AD_ENABLE_BIT; - eptp |= (root_hpa & PAGE_MASK); - - return eptp; -} - -void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) -{ - struct kvm *kvm = vcpu->kvm; - bool update_guest_cr3 = true; - unsigned long guest_cr3; - u64 eptp; - - guest_cr3 = cr3; - if (enable_ept) { - eptp = construct_eptp(vcpu, cr3); - vmcs_write64(EPT_POINTER, eptp); - - if (kvm_x86_ops->tlb_remote_flush) { - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); - to_vmx(vcpu)->ept_pointer = eptp; - to_kvm_vmx(kvm)->ept_pointers_match - = EPT_POINTERS_CHECK; - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); - } - - /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */ - if (is_guest_mode(vcpu)) - update_guest_cr3 = false; - else if (!enable_unrestricted_guest && !is_paging(vcpu)) - guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; - else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) - guest_cr3 = vcpu->arch.cr3; - else /* vmcs01.GUEST_CR3 is already up-to-date. */ - update_guest_cr3 = false; - ept_load_pdptrs(vcpu); - } - - if (update_guest_cr3) - vmcs_writel(GUEST_CR3, guest_cr3); -} - -int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - /* - * Pass through host's Machine Check Enable value to hw_cr4, which - * is in force while we are in guest mode. Do not let guests control - * this bit, even if host CR4.MCE == 0. - */ - unsigned long hw_cr4; - - hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); - if (enable_unrestricted_guest) - hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; - else if (vmx->rmode.vm86_active) - hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; - else - hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; - - if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { - if (cr4 & X86_CR4_UMIP) { - secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC); - hw_cr4 &= ~X86_CR4_UMIP; - } else if (!is_guest_mode(vcpu) || - !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) { - secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC); - } - } - - if (cr4 & X86_CR4_VMXE) { - /* - * To use VMXON (and later other VMX instructions), a guest - * must first be able to turn on cr4.VMXE (see handle_vmon()). - * So basically the check on whether to allow nested VMX - * is here. We operate under the default treatment of SMM, - * so VMX cannot be enabled under SMM. - */ - if (!nested_vmx_allowed(vcpu) || is_smm(vcpu)) - return 1; - } - - if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) - return 1; - - vcpu->arch.cr4 = cr4; - - if (!enable_unrestricted_guest) { - if (enable_ept) { - if (!is_paging(vcpu)) { - hw_cr4 &= ~X86_CR4_PAE; - hw_cr4 |= X86_CR4_PSE; - } else if (!(cr4 & X86_CR4_PAE)) { - hw_cr4 &= ~X86_CR4_PAE; - } - } - - /* - * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in - * hardware. To emulate this behavior, SMEP/SMAP/PKU needs - * to be manually disabled when guest switches to non-paging - * mode. - * - * If !enable_unrestricted_guest, the CPU is always running - * with CR0.PG=1 and CR4 needs to be modified. - * If enable_unrestricted_guest, the CPU automatically - * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0. - */ - if (!is_paging(vcpu)) - hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); - } - - vmcs_writel(CR4_READ_SHADOW, cr4); - vmcs_writel(GUEST_CR4, hw_cr4); - return 0; -} - -void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 ar; - - if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { - *var = vmx->rmode.segs[seg]; - if (seg == VCPU_SREG_TR - || var->selector == vmx_read_guest_seg_selector(vmx, seg)) - return; - var->base = vmx_read_guest_seg_base(vmx, seg); - var->selector = vmx_read_guest_seg_selector(vmx, seg); - return; - } - var->base = vmx_read_guest_seg_base(vmx, seg); - var->limit = vmx_read_guest_seg_limit(vmx, seg); - var->selector = vmx_read_guest_seg_selector(vmx, seg); - ar = vmx_read_guest_seg_ar(vmx, seg); - var->unusable = (ar >> 16) & 1; - var->type = ar & 15; - var->s = (ar >> 4) & 1; - var->dpl = (ar >> 5) & 3; - /* - * Some userspaces do not preserve unusable property. Since usable - * segment has to be present according to VMX spec we can use present - * property to amend userspace bug by making unusable segment always - * nonpresent. vmx_segment_access_rights() already marks nonpresent - * segment as unusable. - */ - var->present = !var->unusable; - var->avl = (ar >> 12) & 1; - var->l = (ar >> 13) & 1; - var->db = (ar >> 14) & 1; - var->g = (ar >> 15) & 1; -} - -static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) -{ - struct kvm_segment s; - - if (to_vmx(vcpu)->rmode.vm86_active) { - vmx_get_segment(vcpu, &s, seg); - return s.base; - } - return vmx_read_guest_seg_base(to_vmx(vcpu), seg); -} - -int vmx_get_cpl(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (unlikely(vmx->rmode.vm86_active)) - return 0; - else { - int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); - return VMX_AR_DPL(ar); - } -} - -static u32 vmx_segment_access_rights(struct kvm_segment *var) -{ - u32 ar; - - if (var->unusable || !var->present) - ar = 1 << 16; - else { - ar = var->type & 15; - ar |= (var->s & 1) << 4; - ar |= (var->dpl & 3) << 5; - ar |= (var->present & 1) << 7; - ar |= (var->avl & 1) << 12; - ar |= (var->l & 1) << 13; - ar |= (var->db & 1) << 14; - ar |= (var->g & 1) << 15; - } - - return ar; -} - -void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - - vmx_segment_cache_clear(vmx); - - if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { - vmx->rmode.segs[seg] = *var; - if (seg == VCPU_SREG_TR) - vmcs_write16(sf->selector, var->selector); - else if (var->s) - fix_rmode_seg(seg, &vmx->rmode.segs[seg]); - goto out; - } - - vmcs_writel(sf->base, var->base); - vmcs_write32(sf->limit, var->limit); - vmcs_write16(sf->selector, var->selector); - - /* - * Fix the "Accessed" bit in AR field of segment registers for older - * qemu binaries. - * IA32 arch specifies that at the time of processor reset the - * "Accessed" bit in the AR field of segment registers is 1. And qemu - * is setting it to 0 in the userland code. This causes invalid guest - * state vmexit when "unrestricted guest" mode is turned on. - * Fix for this setup issue in cpu_reset is being pushed in the qemu - * tree. Newer qemu binaries with that qemu fix would not need this - * kvm hack. - */ - if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) - var->type |= 0x1; /* Accessed */ - - vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); - -out: - vmx->emulation_required = emulation_required(vcpu); -} - -static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -{ - u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); - - *db = (ar >> 14) & 1; - *l = (ar >> 13) & 1; -} - -static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - dt->size = vmcs_read32(GUEST_IDTR_LIMIT); - dt->address = vmcs_readl(GUEST_IDTR_BASE); -} - -static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - vmcs_write32(GUEST_IDTR_LIMIT, dt->size); - vmcs_writel(GUEST_IDTR_BASE, dt->address); -} - -static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - dt->size = vmcs_read32(GUEST_GDTR_LIMIT); - dt->address = vmcs_readl(GUEST_GDTR_BASE); -} - -static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) -{ - vmcs_write32(GUEST_GDTR_LIMIT, dt->size); - vmcs_writel(GUEST_GDTR_BASE, dt->address); -} - -static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) -{ - struct kvm_segment var; - u32 ar; - - vmx_get_segment(vcpu, &var, seg); - var.dpl = 0x3; - if (seg == VCPU_SREG_CS) - var.type = 0x3; - ar = vmx_segment_access_rights(&var); - - if (var.base != (var.selector << 4)) - return false; - if (var.limit != 0xffff) - return false; - if (ar != 0xf3) - return false; - - return true; -} - -static bool code_segment_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment cs; - unsigned int cs_rpl; - - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); - cs_rpl = cs.selector & SEGMENT_RPL_MASK; - - if (cs.unusable) - return false; - if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK)) - return false; - if (!cs.s) - return false; - if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) { - if (cs.dpl > cs_rpl) - return false; - } else { - if (cs.dpl != cs_rpl) - return false; - } - if (!cs.present) - return false; - - /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */ - return true; -} - -static bool stack_segment_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment ss; - unsigned int ss_rpl; - - vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - ss_rpl = ss.selector & SEGMENT_RPL_MASK; - - if (ss.unusable) - return true; - if (ss.type != 3 && ss.type != 7) - return false; - if (!ss.s) - return false; - if (ss.dpl != ss_rpl) /* DPL != RPL */ - return false; - if (!ss.present) - return false; - - return true; -} - -static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) -{ - struct kvm_segment var; - unsigned int rpl; - - vmx_get_segment(vcpu, &var, seg); - rpl = var.selector & SEGMENT_RPL_MASK; - - if (var.unusable) - return true; - if (!var.s) - return false; - if (!var.present) - return false; - if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) { - if (var.dpl < rpl) /* DPL < RPL */ - return false; - } - - /* TODO: Add other members to kvm_segment_field to allow checking for other access - * rights flags - */ - return true; -} - -static bool tr_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment tr; - - vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); - - if (tr.unusable) - return false; - if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */ - return false; - if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ - return false; - if (!tr.present) - return false; - - return true; -} - -static bool ldtr_valid(struct kvm_vcpu *vcpu) -{ - struct kvm_segment ldtr; - - vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); - - if (ldtr.unusable) - return true; - if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */ - return false; - if (ldtr.type != 2) - return false; - if (!ldtr.present) - return false; - - return true; -} - -static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) -{ - struct kvm_segment cs, ss; - - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); - vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - - return ((cs.selector & SEGMENT_RPL_MASK) == - (ss.selector & SEGMENT_RPL_MASK)); -} - -/* - * Check if guest state is valid. Returns true if valid, false if - * not. - * We assume that registers are always usable - */ -static bool guest_state_valid(struct kvm_vcpu *vcpu) -{ - if (enable_unrestricted_guest) - return true; - - /* real mode guest state checks */ - if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { - if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_DS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_ES)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_FS)) - return false; - if (!rmode_segment_valid(vcpu, VCPU_SREG_GS)) - return false; - } else { - /* protected mode guest state checks */ - if (!cs_ss_rpl_check(vcpu)) - return false; - if (!code_segment_valid(vcpu)) - return false; - if (!stack_segment_valid(vcpu)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_DS)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_ES)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_FS)) - return false; - if (!data_segment_valid(vcpu, VCPU_SREG_GS)) - return false; - if (!tr_valid(vcpu)) - return false; - if (!ldtr_valid(vcpu)) - return false; - } - /* TODO: - * - Add checks on RIP - * - Add checks on RFLAGS - */ - - return true; -} - -static int init_rmode_tss(struct kvm *kvm) -{ - gfn_t fn; - u16 data = 0; - int idx, r; - - idx = srcu_read_lock(&kvm->srcu); - fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT; - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); - if (r < 0) - goto out; - data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; - r = kvm_write_guest_page(kvm, fn++, &data, - TSS_IOPB_BASE_OFFSET, sizeof(u16)); - if (r < 0) - goto out; - r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); - if (r < 0) - goto out; - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); - if (r < 0) - goto out; - data = ~0; - r = kvm_write_guest_page(kvm, fn, &data, - RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, - sizeof(u8)); -out: - srcu_read_unlock(&kvm->srcu, idx); - return r; -} - -static int init_rmode_identity_map(struct kvm *kvm) -{ - struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); - int i, idx, r = 0; - kvm_pfn_t identity_map_pfn; - u32 tmp; - - /* Protect kvm_vmx->ept_identity_pagetable_done. */ - mutex_lock(&kvm->slots_lock); - - if (likely(kvm_vmx->ept_identity_pagetable_done)) - goto out2; - - if (!kvm_vmx->ept_identity_map_addr) - kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; - identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT; - - r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, - kvm_vmx->ept_identity_map_addr, PAGE_SIZE); - if (r < 0) - goto out2; - - idx = srcu_read_lock(&kvm->srcu); - r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); - if (r < 0) - goto out; - /* Set up identity-mapping pagetable for EPT in real mode */ - for (i = 0; i < PT32_ENT_PER_PAGE; i++) { - tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | - _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); - r = kvm_write_guest_page(kvm, identity_map_pfn, - &tmp, i * sizeof(tmp), sizeof(tmp)); - if (r < 0) - goto out; - } - kvm_vmx->ept_identity_pagetable_done = true; - -out: - srcu_read_unlock(&kvm->srcu, idx); - -out2: - mutex_unlock(&kvm->slots_lock); - return r; -} - -static void seg_setup(int seg) -{ - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - unsigned int ar; - - vmcs_write16(sf->selector, 0); - vmcs_writel(sf->base, 0); - vmcs_write32(sf->limit, 0xffff); - ar = 0x93; - if (seg == VCPU_SREG_CS) - ar |= 0x08; /* code segment */ - - vmcs_write32(sf->ar_bytes, ar); -} - -static int alloc_apic_access_page(struct kvm *kvm) -{ - struct page *page; - int r = 0; - - mutex_lock(&kvm->slots_lock); - if (kvm->arch.apic_access_page_done) - goto out; - r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, - APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); - if (r) - goto out; - - page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); - if (is_error_page(page)) { - r = -EFAULT; - goto out; - } - - /* - * Do not pin the page in memory, so that memory hot-unplug - * is able to migrate it. - */ - put_page(page); - kvm->arch.apic_access_page_done = true; -out: - mutex_unlock(&kvm->slots_lock); - return r; -} - -int allocate_vpid(void) -{ - int vpid; - - if (!enable_vpid) - return 0; - spin_lock(&vmx_vpid_lock); - vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); - if (vpid < VMX_NR_VPIDS) - __set_bit(vpid, vmx_vpid_bitmap); - else - vpid = 0; - spin_unlock(&vmx_vpid_lock); - return vpid; -} - -void free_vpid(int vpid) -{ - if (!enable_vpid || vpid == 0) - return; - spin_lock(&vmx_vpid_lock); - __clear_bit(vpid, vmx_vpid_bitmap); - spin_unlock(&vmx_vpid_lock); -} - -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) -{ - int f = sizeof(unsigned long); - - if (!cpu_has_vmx_msr_bitmap()) - return; - - if (static_branch_unlikely(&enable_evmcs)) - evmcs_touch_msr_bitmap(); - - /* - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals - * have the write-low and read-high bitmap offsets the wrong way round. - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. - */ - if (msr <= 0x1fff) { - if (type & MSR_TYPE_R) - /* read-low */ - __clear_bit(msr, msr_bitmap + 0x000 / f); - - if (type & MSR_TYPE_W) - /* write-low */ - __clear_bit(msr, msr_bitmap + 0x800 / f); - - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - if (type & MSR_TYPE_R) - /* read-high */ - __clear_bit(msr, msr_bitmap + 0x400 / f); - - if (type & MSR_TYPE_W) - /* write-high */ - __clear_bit(msr, msr_bitmap + 0xc00 / f); - - } -} - -static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) -{ - int f = sizeof(unsigned long); - - if (!cpu_has_vmx_msr_bitmap()) - return; - - if (static_branch_unlikely(&enable_evmcs)) - evmcs_touch_msr_bitmap(); - - /* - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals - * have the write-low and read-high bitmap offsets the wrong way round. - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. - */ - if (msr <= 0x1fff) { - if (type & MSR_TYPE_R) - /* read-low */ - __set_bit(msr, msr_bitmap + 0x000 / f); - - if (type & MSR_TYPE_W) - /* write-low */ - __set_bit(msr, msr_bitmap + 0x800 / f); - - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - if (type & MSR_TYPE_R) - /* read-high */ - __set_bit(msr, msr_bitmap + 0x400 / f); - - if (type & MSR_TYPE_W) - /* write-high */ - __set_bit(msr, msr_bitmap + 0xc00 / f); - - } -} - -static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type, bool value) -{ - if (value) - vmx_enable_intercept_for_msr(msr_bitmap, msr, type); - else - vmx_disable_intercept_for_msr(msr_bitmap, msr, type); -} - -static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) -{ - u8 mode = 0; - - if (cpu_has_secondary_exec_ctrls() && - (secondary_exec_controls_get(to_vmx(vcpu)) & - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { - mode |= MSR_BITMAP_MODE_X2APIC; - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) - mode |= MSR_BITMAP_MODE_X2APIC_APICV; - } - - return mode; -} - -static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, - u8 mode) -{ - int msr; - - for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { - unsigned word = msr / BITS_PER_LONG; - msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; - msr_bitmap[word + (0x800 / sizeof(long))] = ~0; - } - - if (mode & MSR_BITMAP_MODE_X2APIC) { - /* - * TPR reads and writes can be virtualized even if virtual interrupt - * delivery is not in use. - */ - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); - if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { - vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); - } - } -} - -void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; - u8 mode = vmx_msr_bitmap_mode(vcpu); - u8 changed = mode ^ vmx->msr_bitmap_mode; - - if (!changed) - return; - - if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) - vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); - - vmx->msr_bitmap_mode = mode; -} - -void pt_update_intercept_for_msr(struct vcpu_vmx *vmx) -{ - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; - bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN); - u32 i; - - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_STATUS, - MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_BASE, - MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_MASK, - MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_CR3_MATCH, - MSR_TYPE_RW, flag); - for (i = 0; i < vmx->pt_desc.addr_range; i++) { - vmx_set_intercept_for_msr(msr_bitmap, - MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag); - vmx_set_intercept_for_msr(msr_bitmap, - MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag); - } -} - -static bool vmx_get_enable_apicv(struct kvm *kvm) -{ - return enable_apicv; -} - -static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - void *vapic_page; - u32 vppr; - int rvi; - - if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || - !nested_cpu_has_vid(get_vmcs12(vcpu)) || - WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn)) - return false; - - rvi = vmx_get_rvi(); - - vapic_page = vmx->nested.virtual_apic_map.hva; - vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); - - return ((rvi & 0xf0) > (vppr & 0xf0)); -} - -static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, - bool nested) -{ -#ifdef CONFIG_SMP - int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR; - - if (vcpu->mode == IN_GUEST_MODE) { - /* - * The vector of interrupt to be delivered to vcpu had - * been set in PIR before this function. - * - * Following cases will be reached in this block, and - * we always send a notification event in all cases as - * explained below. - * - * Case 1: vcpu keeps in non-root mode. Sending a - * notification event posts the interrupt to vcpu. - * - * Case 2: vcpu exits to root mode and is still - * runnable. PIR will be synced to vIRR before the - * next vcpu entry. Sending a notification event in - * this case has no effect, as vcpu is not in root - * mode. - * - * Case 3: vcpu exits to root mode and is blocked. - * vcpu_block() has already synced PIR to vIRR and - * never blocks vcpu if vIRR is not cleared. Therefore, - * a blocked vcpu here does not wait for any requested - * interrupts in PIR, and sending a notification event - * which has no effect is safe here. - */ - - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); - return true; - } -#endif - return false; -} - -static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, - int vector) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (is_guest_mode(vcpu) && - vector == vmx->nested.posted_intr_nv) { - /* - * If a posted intr is not recognized by hardware, - * we will accomplish it in the next vmentry. - */ - vmx->nested.pi_pending = true; - kvm_make_request(KVM_REQ_EVENT, vcpu); - /* the PIR and ON have been set by L1. */ - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true)) - kvm_vcpu_kick(vcpu); - return 0; - } - return -1; -} -/* - * Send interrupt to vcpu via posted interrupt way. - * 1. If target vcpu is running(non-root mode), send posted interrupt - * notification to vcpu and hardware will sync PIR to vIRR atomically. - * 2. If target vcpu isn't running(root mode), kick it to pick up the - * interrupt from PIR in next vmentry. - */ -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - int r; - - r = vmx_deliver_nested_posted_interrupt(vcpu, vector); - if (!r) - return; - - if (pi_test_and_set_pir(vector, &vmx->pi_desc)) - return; - - /* If a previous notification has sent the IPI, nothing to do. */ - if (pi_test_and_set_on(&vmx->pi_desc)) - return; - - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) - kvm_vcpu_kick(vcpu); -} - -/* - * Set up the vmcs's constant host-state fields, i.e., host-state fields that - * will not change in the lifetime of the guest. - * Note that host-state that does change is set elsewhere. E.g., host-state - * that is set differently for each CPU is set in vmx_vcpu_load(), not here. - */ -void vmx_set_constant_host_state(struct vcpu_vmx *vmx) -{ - u32 low32, high32; - unsigned long tmpl; - unsigned long cr0, cr3, cr4; - - cr0 = read_cr0(); - WARN_ON(cr0 & X86_CR0_TS); - vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */ - - /* - * Save the most likely value for this task's CR3 in the VMCS. - * We can't use __get_current_cr3_fast() because we're not atomic. - */ - cr3 = __read_cr3(); - vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ - vmx->loaded_vmcs->host_state.cr3 = cr3; - - /* Save the most likely value for this task's CR4 in the VMCS. */ - cr4 = cr4_read_shadow(); - vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ - vmx->loaded_vmcs->host_state.cr4 = cr4; - - vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ -#ifdef CONFIG_X86_64 - /* - * Load null selectors, so we can avoid reloading them in - * vmx_prepare_switch_to_host(), in case userspace uses - * the null selectors too (the expected case). - */ - vmcs_write16(HOST_DS_SELECTOR, 0); - vmcs_write16(HOST_ES_SELECTOR, 0); -#else - vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ - vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ -#endif - vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ - vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ - - vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */ - - vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */ - - rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); - vmcs_write32(HOST_IA32_SYSENTER_CS, low32); - rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); - vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ - - if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { - rdmsr(MSR_IA32_CR_PAT, low32, high32); - vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32)); - } - - if (cpu_has_load_ia32_efer()) - vmcs_write64(HOST_IA32_EFER, host_efer); -} - -void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) -{ - vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; - if (enable_ept) - vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; - if (is_guest_mode(&vmx->vcpu)) - vmx->vcpu.arch.cr4_guest_owned_bits &= - ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask; - vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); -} - -u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) -{ - u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; - - if (!kvm_vcpu_apicv_active(&vmx->vcpu)) - pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; - - if (!enable_vnmi) - pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; - - if (!enable_preemption_timer) - pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; - - return pin_based_exec_ctrl; -} - -static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); - if (cpu_has_secondary_exec_ctrls()) { - if (kvm_vcpu_apicv_active(vcpu)) - secondary_exec_controls_setbit(vmx, - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - else - secondary_exec_controls_clearbit(vmx, - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - } - - if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(vcpu); -} - -u32 vmx_exec_control(struct vcpu_vmx *vmx) -{ - u32 exec_control = vmcs_config.cpu_based_exec_ctrl; - - if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) - exec_control &= ~CPU_BASED_MOV_DR_EXITING; - - if (!cpu_need_tpr_shadow(&vmx->vcpu)) { - exec_control &= ~CPU_BASED_TPR_SHADOW; -#ifdef CONFIG_X86_64 - exec_control |= CPU_BASED_CR8_STORE_EXITING | - CPU_BASED_CR8_LOAD_EXITING; -#endif - } - if (!enable_ept) - exec_control |= CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_INVLPG_EXITING; - if (kvm_mwait_in_guest(vmx->vcpu.kvm)) - exec_control &= ~(CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING); - if (kvm_hlt_in_guest(vmx->vcpu.kvm)) - exec_control &= ~CPU_BASED_HLT_EXITING; - return exec_control; -} - - -static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) -{ - struct kvm_vcpu *vcpu = &vmx->vcpu; - - u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; - - if (pt_mode == PT_MODE_SYSTEM) - exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX); - if (!cpu_need_virtualize_apic_accesses(vcpu)) - exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - if (vmx->vpid == 0) - exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; - if (!enable_ept) { - exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; - enable_unrestricted_guest = 0; - } - if (!enable_unrestricted_guest) - exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; - if (kvm_pause_in_guest(vmx->vcpu.kvm)) - exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; - if (!kvm_vcpu_apicv_active(vcpu)) - exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; - - /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP, - * in vmx_set_cr4. */ - exec_control &= ~SECONDARY_EXEC_DESC; - - /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD - (handle_vmptrld). - We can NOT enable shadow_vmcs here because we don't have yet - a current VMCS12 - */ - exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; - - if (!enable_pml) - exec_control &= ~SECONDARY_EXEC_ENABLE_PML; - - if (vmx_xsaves_supported()) { - /* Exposing XSAVES only when XSAVE is exposed */ - bool xsaves_enabled = - guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); - - vcpu->arch.xsaves_enabled = xsaves_enabled; - - if (!xsaves_enabled) - exec_control &= ~SECONDARY_EXEC_XSAVES; - - if (nested) { - if (xsaves_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_XSAVES; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_XSAVES; - } - } - - if (vmx_rdtscp_supported()) { - bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP); - if (!rdtscp_enabled) - exec_control &= ~SECONDARY_EXEC_RDTSCP; - - if (nested) { - if (rdtscp_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_RDTSCP; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_RDTSCP; - } - } - - if (vmx_invpcid_supported()) { - /* Exposing INVPCID only when PCID is exposed */ - bool invpcid_enabled = - guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) && - guest_cpuid_has(vcpu, X86_FEATURE_PCID); - - if (!invpcid_enabled) { - exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; - guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID); - } - - if (nested) { - if (invpcid_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_ENABLE_INVPCID; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_ENABLE_INVPCID; - } - } - - if (vmx_rdrand_supported()) { - bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND); - if (rdrand_enabled) - exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING; - - if (nested) { - if (rdrand_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_RDRAND_EXITING; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_RDRAND_EXITING; - } - } - - if (vmx_rdseed_supported()) { - bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED); - if (rdseed_enabled) - exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING; - - if (nested) { - if (rdseed_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_RDSEED_EXITING; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_RDSEED_EXITING; - } - } - - if (vmx_waitpkg_supported()) { - bool waitpkg_enabled = - guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG); - - if (!waitpkg_enabled) - exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; - - if (nested) { - if (waitpkg_enabled) - vmx->nested.msrs.secondary_ctls_high |= - SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; - else - vmx->nested.msrs.secondary_ctls_high &= - ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; - } - } - - vmx->secondary_exec_control = exec_control; -} - -static void ept_set_mmio_spte_mask(void) -{ - /* - * EPT Misconfigurations can be generated if the value of bits 2:0 - * of an EPT paging-structure entry is 110b (write/execute). - */ - kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, - VMX_EPT_MISCONFIG_WX_VALUE, 0); -} - -#define VMX_XSS_EXIT_BITMAP 0 - -/* - * Noting that the initialization of Guest-state Area of VMCS is in - * vmx_vcpu_reset(). - */ -static void init_vmcs(struct vcpu_vmx *vmx) -{ - if (nested) - nested_vmx_set_vmcs_shadowing_bitmap(); - - if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); - - vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ - - /* Control */ - pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); - - exec_controls_set(vmx, vmx_exec_control(vmx)); - - if (cpu_has_secondary_exec_ctrls()) { - vmx_compute_secondary_exec_control(vmx); - secondary_exec_controls_set(vmx, vmx->secondary_exec_control); - } - - if (kvm_vcpu_apicv_active(&vmx->vcpu)) { - vmcs_write64(EOI_EXIT_BITMAP0, 0); - vmcs_write64(EOI_EXIT_BITMAP1, 0); - vmcs_write64(EOI_EXIT_BITMAP2, 0); - vmcs_write64(EOI_EXIT_BITMAP3, 0); - - vmcs_write16(GUEST_INTR_STATUS, 0); - - vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); - vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); - } - - if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { - vmcs_write32(PLE_GAP, ple_gap); - vmx->ple_window = ple_window; - vmx->ple_window_dirty = true; - } - - vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); - vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); - vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ - - vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ - vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ - vmx_set_constant_host_state(vmx); - vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */ - vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ - - if (cpu_has_vmx_vmfunc()) - vmcs_write64(VM_FUNCTION_CONTROL, 0); - - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); - vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); - vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); - - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) - vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); - - vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); - - /* 22.2.1, 20.8.1 */ - vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); - - vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; - vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); - - set_cr4_guest_host_mask(vmx); - - if (vmx->vpid != 0) - vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); - - if (vmx_xsaves_supported()) - vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); - - if (enable_pml) { - vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); - } - - if (cpu_has_vmx_encls_vmexit()) - vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); - - if (pt_mode == PT_MODE_HOST_GUEST) { - memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc)); - /* Bit[6~0] are forced to 1, writes are ignored. */ - vmx->pt_desc.guest.output_mask = 0x7F; - vmcs_write64(GUEST_IA32_RTIT_CTL, 0); - } -} - -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct msr_data apic_base_msr; - u64 cr0; - - vmx->rmode.vm86_active = 0; - vmx->spec_ctrl = 0; - - vmx->msr_ia32_umwait_control = 0; - - vcpu->arch.microcode_version = 0x100000000ULL; - vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); - vmx->hv_deadline_tsc = -1; - kvm_set_cr8(vcpu, 0); - - if (!init_event) { - apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_reset_bsp(vcpu)) - apic_base_msr.data |= MSR_IA32_APICBASE_BSP; - apic_base_msr.host_initiated = true; - kvm_set_apic_base(vcpu, &apic_base_msr); - } - - vmx_segment_cache_clear(vmx); - - seg_setup(VCPU_SREG_CS); - vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); - - seg_setup(VCPU_SREG_DS); - seg_setup(VCPU_SREG_ES); - seg_setup(VCPU_SREG_FS); - seg_setup(VCPU_SREG_GS); - seg_setup(VCPU_SREG_SS); - - vmcs_write16(GUEST_TR_SELECTOR, 0); - vmcs_writel(GUEST_TR_BASE, 0); - vmcs_write32(GUEST_TR_LIMIT, 0xffff); - vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); - - vmcs_write16(GUEST_LDTR_SELECTOR, 0); - vmcs_writel(GUEST_LDTR_BASE, 0); - vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); - vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); - - if (!init_event) { - vmcs_write32(GUEST_SYSENTER_CS, 0); - vmcs_writel(GUEST_SYSENTER_ESP, 0); - vmcs_writel(GUEST_SYSENTER_EIP, 0); - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); - } - - kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); - kvm_rip_write(vcpu, 0xfff0); - - vmcs_writel(GUEST_GDTR_BASE, 0); - vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); - - vmcs_writel(GUEST_IDTR_BASE, 0); - vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); - - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); - if (kvm_mpx_supported()) - vmcs_write64(GUEST_BNDCFGS, 0); - - setup_msrs(vmx); - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ - - if (cpu_has_vmx_tpr_shadow() && !init_event) { - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); - if (cpu_need_tpr_shadow(vcpu)) - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, - __pa(vcpu->arch.apic->regs)); - vmcs_write32(TPR_THRESHOLD, 0); - } - - kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - - cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; - vmx->vcpu.arch.cr0 = cr0; - vmx_set_cr0(vcpu, cr0); /* enter rmode */ - vmx_set_cr4(vcpu, 0); - vmx_set_efer(vcpu, 0); - - update_exception_bitmap(vcpu); - - vpid_sync_context(vmx->vpid); - if (init_event) - vmx_clear_hlt(vcpu); -} - -static void enable_irq_window(struct kvm_vcpu *vcpu) -{ - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); -} - -static void enable_nmi_window(struct kvm_vcpu *vcpu) -{ - if (!enable_vnmi || - vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { - enable_irq_window(vcpu); - return; - } - - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); -} - -static void vmx_inject_irq(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - uint32_t intr; - int irq = vcpu->arch.interrupt.nr; - - trace_kvm_inj_virq(irq); - - ++vcpu->stat.irq_injections; - if (vmx->rmode.vm86_active) { - int inc_eip = 0; - if (vcpu->arch.interrupt.soft) - inc_eip = vcpu->arch.event_exit_inst_len; - kvm_inject_realmode_interrupt(vcpu, irq, inc_eip); - return; - } - intr = irq | INTR_INFO_VALID_MASK; - if (vcpu->arch.interrupt.soft) { - intr |= INTR_TYPE_SOFT_INTR; - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmx->vcpu.arch.event_exit_inst_len); - } else - intr |= INTR_TYPE_EXT_INTR; - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); - - vmx_clear_hlt(vcpu); -} - -static void vmx_inject_nmi(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!enable_vnmi) { - /* - * Tracking the NMI-blocked state in software is built upon - * finding the next open IRQ window. This, in turn, depends on - * well-behaving guests: They have to keep IRQs disabled at - * least as long as the NMI handler runs. Otherwise we may - * cause NMI nesting, maybe breaking the guest. But as this is - * highly unlikely, we can live with the residual risk. - */ - vmx->loaded_vmcs->soft_vnmi_blocked = 1; - vmx->loaded_vmcs->vnmi_blocked_time = 0; - } - - ++vcpu->stat.nmi_injections; - vmx->loaded_vmcs->nmi_known_unmasked = false; - - if (vmx->rmode.vm86_active) { - kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0); - return; - } - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); - - vmx_clear_hlt(vcpu); -} - -bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool masked; - - if (!enable_vnmi) - return vmx->loaded_vmcs->soft_vnmi_blocked; - if (vmx->loaded_vmcs->nmi_known_unmasked) - return false; - masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; - vmx->loaded_vmcs->nmi_known_unmasked = !masked; - return masked; -} - -void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!enable_vnmi) { - if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { - vmx->loaded_vmcs->soft_vnmi_blocked = masked; - vmx->loaded_vmcs->vnmi_blocked_time = 0; - } - } else { - vmx->loaded_vmcs->nmi_known_unmasked = !masked; - if (masked) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - } -} - -static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) -{ - if (to_vmx(vcpu)->nested.nested_run_pending) - return 0; - - if (!enable_vnmi && - to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) - return 0; - - return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI - | GUEST_INTR_STATE_NMI)); -} - -static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) -{ - return (!to_vmx(vcpu)->nested.nested_run_pending && - vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); -} - -static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) -{ - int ret; - - if (enable_unrestricted_guest) - return 0; - - ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, - PAGE_SIZE * 3); - if (ret) - return ret; - to_kvm_vmx(kvm)->tss_addr = addr; - return init_rmode_tss(kvm); -} - -static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) -{ - to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; - return 0; -} - -static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) -{ - switch (vec) { - case BP_VECTOR: - /* - * Update instruction length as we may reinject the exception - * from user space while in guest debugging mode. - */ - to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = - vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) - return false; - /* fall through */ - case DB_VECTOR: - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return false; - /* fall through */ - case DE_VECTOR: - case OF_VECTOR: - case BR_VECTOR: - case UD_VECTOR: - case DF_VECTOR: - case SS_VECTOR: - case GP_VECTOR: - case MF_VECTOR: - return true; - break; - } - return false; -} - -static int handle_rmode_exception(struct kvm_vcpu *vcpu, - int vec, u32 err_code) -{ - /* - * Instruction with address size override prefix opcode 0x67 - * Cause the #SS fault with 0 error code in VM86 mode. - */ - if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { - if (kvm_emulate_instruction(vcpu, 0)) { - if (vcpu->arch.halt_request) { - vcpu->arch.halt_request = 0; - return kvm_vcpu_halt(vcpu); - } - return 1; - } - return 0; - } - - /* - * Forward all other exceptions that are valid in real mode. - * FIXME: Breaks guest debugging in real mode, needs to be fixed with - * the required debugging infrastructure rework. - */ - kvm_queue_exception(vcpu, vec); - return 1; -} - -/* - * Trigger machine check on the host. We assume all the MSRs are already set up - * by the CPU and that we still run on the same CPU as the MCE occurred on. - * We pass a fake environment to the machine check handler because we want - * the guest to be always treated like user space, no matter what context - * it used internally. - */ -static void kvm_machine_check(void) -{ -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) - struct pt_regs regs = { - .cs = 3, /* Fake ring 3 no matter what the guest ran on */ - .flags = X86_EFLAGS_IF, - }; - - do_machine_check(®s, 0); -#endif -} - -static int handle_machine_check(struct kvm_vcpu *vcpu) -{ - /* handled by vmx_vcpu_run() */ - return 1; -} - -static int handle_exception_nmi(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_run *kvm_run = vcpu->run; - u32 intr_info, ex_no, error_code; - unsigned long cr2, rip, dr6; - u32 vect_info; - - vect_info = vmx->idt_vectoring_info; - intr_info = vmx->exit_intr_info; - - if (is_machine_check(intr_info) || is_nmi(intr_info)) - return 1; /* handled by handle_exception_nmi_irqoff() */ - - if (is_invalid_opcode(intr_info)) - return handle_ud(vcpu); - - error_code = 0; - if (intr_info & INTR_INFO_DELIVER_CODE_MASK) - error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - - if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { - WARN_ON_ONCE(!enable_vmware_backdoor); - - /* - * VMware backdoor emulation on #GP interception only handles - * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero - * error code on #GP. - */ - if (error_code) { - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); - return 1; - } - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); - } - - /* - * The #PF with PFEC.RSVD = 1 indicates the guest is accessing - * MMIO, it is better to report an internal error. - * See the comments in vmx_handle_exit. - */ - if ((vect_info & VECTORING_INFO_VALID_MASK) && - !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; - vcpu->run->internal.ndata = 3; - vcpu->run->internal.data[0] = vect_info; - vcpu->run->internal.data[1] = intr_info; - vcpu->run->internal.data[2] = error_code; - return 0; - } - - if (is_page_fault(intr_info)) { - cr2 = vmcs_readl(EXIT_QUALIFICATION); - /* EPT won't cause page fault directly */ - WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept); - return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0); - } - - ex_no = intr_info & INTR_INFO_VECTOR_MASK; - - if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) - return handle_rmode_exception(vcpu, ex_no, error_code); - - switch (ex_no) { - case AC_VECTOR: - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); - return 1; - case DB_VECTOR: - dr6 = vmcs_readl(EXIT_QUALIFICATION); - if (!(vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= dr6 | DR6_RTM; - if (is_icebp(intr_info)) - WARN_ON(!skip_emulated_instruction(vcpu)); - - kvm_queue_exception(vcpu, DB_VECTOR); - return 1; - } - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; - kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); - /* fall through */ - case BP_VECTOR: - /* - * Update instruction length as we may reinject #BP from - * user space while in guest debugging mode. Reading it for - * #DB as well causes no harm, it is not used in that case. - */ - vmx->vcpu.arch.event_exit_inst_len = - vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - kvm_run->exit_reason = KVM_EXIT_DEBUG; - rip = kvm_rip_read(vcpu); - kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; - kvm_run->debug.arch.exception = ex_no; - break; - default: - kvm_run->exit_reason = KVM_EXIT_EXCEPTION; - kvm_run->ex.exception = ex_no; - kvm_run->ex.error_code = error_code; - break; - } - return 0; -} - -static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu) -{ - ++vcpu->stat.irq_exits; - return 1; -} - -static int handle_triple_fault(struct kvm_vcpu *vcpu) -{ - vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; - vcpu->mmio_needed = 0; - return 0; -} - -static int handle_io(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - int size, in, string; - unsigned port; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - string = (exit_qualification & 16) != 0; - - ++vcpu->stat.io_exits; - - if (string) - return kvm_emulate_instruction(vcpu, 0); - - port = exit_qualification >> 16; - size = (exit_qualification & 7) + 1; - in = (exit_qualification & 8) != 0; - - return kvm_fast_pio(vcpu, size, port, in); -} - -static void -vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) -{ - /* - * Patch in the VMCALL instruction: - */ - hypercall[0] = 0x0f; - hypercall[1] = 0x01; - hypercall[2] = 0xc1; -} - -/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ -static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) -{ - if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - unsigned long orig_val = val; - - /* - * We get here when L2 changed cr0 in a way that did not change - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. So we first calculate the - * effective cr0 value that L1 would like to write into the - * hardware. It consists of the L2-owned bits from the new - * value combined with the L1-owned bits from L1's guest_cr0. - */ - val = (val & ~vmcs12->cr0_guest_host_mask) | - (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); - - if (!nested_guest_cr0_valid(vcpu, val)) - return 1; - - if (kvm_set_cr0(vcpu, val)) - return 1; - vmcs_writel(CR0_READ_SHADOW, orig_val); - return 0; - } else { - if (to_vmx(vcpu)->nested.vmxon && - !nested_host_cr0_valid(vcpu, val)) - return 1; - - return kvm_set_cr0(vcpu, val); - } -} - -static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) -{ - if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - unsigned long orig_val = val; - - /* analogously to handle_set_cr0 */ - val = (val & ~vmcs12->cr4_guest_host_mask) | - (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); - if (kvm_set_cr4(vcpu, val)) - return 1; - vmcs_writel(CR4_READ_SHADOW, orig_val); - return 0; - } else - return kvm_set_cr4(vcpu, val); -} - -static int handle_desc(struct kvm_vcpu *vcpu) -{ - WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); - return kvm_emulate_instruction(vcpu, 0); -} - -static int handle_cr(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification, val; - int cr; - int reg; - int err; - int ret; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - cr = exit_qualification & 15; - reg = (exit_qualification >> 8) & 15; - switch ((exit_qualification >> 4) & 3) { - case 0: /* mov to cr */ - val = kvm_register_readl(vcpu, reg); - trace_kvm_cr_write(cr, val); - switch (cr) { - case 0: - err = handle_set_cr0(vcpu, val); - return kvm_complete_insn_gp(vcpu, err); - case 3: - WARN_ON_ONCE(enable_unrestricted_guest); - err = kvm_set_cr3(vcpu, val); - return kvm_complete_insn_gp(vcpu, err); - case 4: - err = handle_set_cr4(vcpu, val); - return kvm_complete_insn_gp(vcpu, err); - case 8: { - u8 cr8_prev = kvm_get_cr8(vcpu); - u8 cr8 = (u8)val; - err = kvm_set_cr8(vcpu, cr8); - ret = kvm_complete_insn_gp(vcpu, err); - if (lapic_in_kernel(vcpu)) - return ret; - if (cr8_prev <= cr8) - return ret; - /* - * TODO: we might be squashing a - * KVM_GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ - vcpu->run->exit_reason = KVM_EXIT_SET_TPR; - return 0; - } - } - break; - case 2: /* clts */ - WARN_ONCE(1, "Guest should always own CR0.TS"); - vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); - trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); - return kvm_skip_emulated_instruction(vcpu); - case 1: /*mov from cr*/ - switch (cr) { - case 3: - WARN_ON_ONCE(enable_unrestricted_guest); - val = kvm_read_cr3(vcpu); - kvm_register_write(vcpu, reg, val); - trace_kvm_cr_read(cr, val); - return kvm_skip_emulated_instruction(vcpu); - case 8: - val = kvm_get_cr8(vcpu); - kvm_register_write(vcpu, reg, val); - trace_kvm_cr_read(cr, val); - return kvm_skip_emulated_instruction(vcpu); - } - break; - case 3: /* lmsw */ - val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; - trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); - kvm_lmsw(vcpu, val); - - return kvm_skip_emulated_instruction(vcpu); - default: - break; - } - vcpu->run->exit_reason = 0; - vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n", - (int)(exit_qualification >> 4) & 3, cr); - return 0; -} - -static int handle_dr(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - int dr, dr7, reg; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - dr = exit_qualification & DEBUG_REG_ACCESS_NUM; - - /* First, if DR does not exist, trigger UD */ - if (!kvm_require_dr(vcpu, dr)) - return 1; - - /* Do not handle if the CPL > 0, will trigger GP on re-entry */ - if (!kvm_require_cpl(vcpu, 0)) - return 1; - dr7 = vmcs_readl(GUEST_DR7); - if (dr7 & DR7_GD) { - /* - * As the vm-exit takes precedence over the debug trap, we - * need to emulate the latter, either for the host or the - * guest debugging itself. - */ - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { - vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; - vcpu->run->debug.arch.dr7 = dr7; - vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); - vcpu->run->debug.arch.exception = DB_VECTOR; - vcpu->run->exit_reason = KVM_EXIT_DEBUG; - return 0; - } else { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= DR6_BD | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); - return 1; - } - } - - if (vcpu->guest_debug == 0) { - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); - - /* - * No more DR vmexits; force a reload of the debug registers - * and reenter on this instruction. The next vmexit will - * retrieve the full state of the debug registers. - */ - vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; - return 1; - } - - reg = DEBUG_REG_ACCESS_REG(exit_qualification); - if (exit_qualification & TYPE_MOV_FROM_DR) { - unsigned long val; - - if (kvm_get_dr(vcpu, dr, &val)) - return 1; - kvm_register_write(vcpu, reg, val); - } else - if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg))) - return 1; - - return kvm_skip_emulated_instruction(vcpu); -} - -static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.dr6; -} - -static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) -{ -} - -static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) -{ - get_debugreg(vcpu->arch.db[0], 0); - get_debugreg(vcpu->arch.db[1], 1); - get_debugreg(vcpu->arch.db[2], 2); - get_debugreg(vcpu->arch.db[3], 3); - get_debugreg(vcpu->arch.dr6, 6); - vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); - - vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); -} - -static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) -{ - vmcs_writel(GUEST_DR7, val); -} - -static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) -{ - kvm_apic_update_ppr(vcpu); - return 1; -} - -static int handle_interrupt_window(struct kvm_vcpu *vcpu) -{ - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); - - kvm_make_request(KVM_REQ_EVENT, vcpu); - - ++vcpu->stat.irq_window_exits; - return 1; -} - -static int handle_vmcall(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_hypercall(vcpu); -} - -static int handle_invd(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_instruction(vcpu, 0); -} - -static int handle_invlpg(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - kvm_mmu_invlpg(vcpu, exit_qualification); - return kvm_skip_emulated_instruction(vcpu); -} - -static int handle_rdpmc(struct kvm_vcpu *vcpu) -{ - int err; - - err = kvm_rdpmc(vcpu); - return kvm_complete_insn_gp(vcpu, err); -} - -static int handle_wbinvd(struct kvm_vcpu *vcpu) -{ - return kvm_emulate_wbinvd(vcpu); -} - -static int handle_xsetbv(struct kvm_vcpu *vcpu) -{ - u64 new_bv = kvm_read_edx_eax(vcpu); - u32 index = kvm_rcx_read(vcpu); - - if (kvm_set_xcr(vcpu, index, new_bv) == 0) - return kvm_skip_emulated_instruction(vcpu); - return 1; -} - -static int handle_apic_access(struct kvm_vcpu *vcpu) -{ - if (likely(fasteoi)) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - int access_type, offset; - - access_type = exit_qualification & APIC_ACCESS_TYPE; - offset = exit_qualification & APIC_ACCESS_OFFSET; - /* - * Sane guest uses MOV to write EOI, with written value - * not cared. So make a short-circuit here by avoiding - * heavy instruction emulation. - */ - if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) && - (offset == APIC_EOI)) { - kvm_lapic_set_eoi(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - } - return kvm_emulate_instruction(vcpu, 0); -} - -static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - int vector = exit_qualification & 0xff; - - /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ - kvm_apic_set_eoi_accelerated(vcpu, vector); - return 1; -} - -static int handle_apic_write(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - u32 offset = exit_qualification & 0xfff; - - /* APIC-write VM exit is trap-like and thus no need to adjust IP */ - kvm_apic_write_nodecode(vcpu, offset); - return 1; -} - -static int handle_task_switch(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long exit_qualification; - bool has_error_code = false; - u32 error_code = 0; - u16 tss_selector; - int reason, type, idt_v, idt_index; - - idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); - idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK); - type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - reason = (u32)exit_qualification >> 30; - if (reason == TASK_SWITCH_GATE && idt_v) { - switch (type) { - case INTR_TYPE_NMI_INTR: - vcpu->arch.nmi_injected = false; - vmx_set_nmi_mask(vcpu, true); - break; - case INTR_TYPE_EXT_INTR: - case INTR_TYPE_SOFT_INTR: - kvm_clear_interrupt_queue(vcpu); - break; - case INTR_TYPE_HARD_EXCEPTION: - if (vmx->idt_vectoring_info & - VECTORING_INFO_DELIVER_CODE_MASK) { - has_error_code = true; - error_code = - vmcs_read32(IDT_VECTORING_ERROR_CODE); - } - /* fall through */ - case INTR_TYPE_SOFT_EXCEPTION: - kvm_clear_exception_queue(vcpu); - break; - default: - break; - } - } - tss_selector = exit_qualification; - - if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && - type != INTR_TYPE_EXT_INTR && - type != INTR_TYPE_NMI_INTR)) - WARN_ON(!skip_emulated_instruction(vcpu)); - - /* - * TODO: What about debug traps on tss switch? - * Are we supposed to inject them and update dr6? - */ - return kvm_task_switch(vcpu, tss_selector, - type == INTR_TYPE_SOFT_INTR ? idt_index : -1, - reason, has_error_code, error_code); -} - -static int handle_ept_violation(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - gpa_t gpa; - u64 error_code; - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - /* - * EPT violation happened while executing iret from NMI, - * "blocked by NMI" bit has to be set before next VM entry. - * There are errata that may cause this bit to not be set: - * AAK134, BY25. - */ - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - enable_vnmi && - (exit_qualification & INTR_INFO_UNBLOCK_NMI)) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); - - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - trace_kvm_page_fault(gpa, exit_qualification); - - /* Is it a read fault? */ - error_code = (exit_qualification & EPT_VIOLATION_ACC_READ) - ? PFERR_USER_MASK : 0; - /* Is it a write fault? */ - error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE) - ? PFERR_WRITE_MASK : 0; - /* Is it a fetch fault? */ - error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR) - ? PFERR_FETCH_MASK : 0; - /* ept page table entry is present? */ - error_code |= (exit_qualification & - (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE | - EPT_VIOLATION_EXECUTABLE)) - ? PFERR_PRESENT_MASK : 0; - - error_code |= (exit_qualification & 0x100) != 0 ? - PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; - - vcpu->arch.exit_qualification = exit_qualification; - return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); -} - -static int handle_ept_misconfig(struct kvm_vcpu *vcpu) -{ - gpa_t gpa; - - /* - * A nested guest cannot optimize MMIO vmexits, because we have an - * nGPA here instead of the required GPA. - */ - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - if (!is_guest_mode(vcpu) && - !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { - trace_kvm_fast_mmio(gpa); - return kvm_skip_emulated_instruction(vcpu); - } - - return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); -} - -static int handle_nmi_window(struct kvm_vcpu *vcpu) -{ - WARN_ON_ONCE(!enable_vnmi); - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); - ++vcpu->stat.nmi_window_exits; - kvm_make_request(KVM_REQ_EVENT, vcpu); - - return 1; -} - -static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - bool intr_window_requested; - unsigned count = 130; - - /* - * We should never reach the point where we are emulating L2 - * due to invalid guest state as that means we incorrectly - * allowed a nested VMEntry with an invalid vmcs12. - */ - WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); - - intr_window_requested = exec_controls_get(vmx) & - CPU_BASED_INTR_WINDOW_EXITING; - - while (vmx->emulation_required && count-- != 0) { - if (intr_window_requested && vmx_interrupt_allowed(vcpu)) - return handle_interrupt_window(&vmx->vcpu); - - if (kvm_test_request(KVM_REQ_EVENT, vcpu)) - return 1; - - if (!kvm_emulate_instruction(vcpu, 0)) - return 0; - - if (vmx->emulation_required && !vmx->rmode.vm86_active && - vcpu->arch.exception.pending) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } - - if (vcpu->arch.halt_request) { - vcpu->arch.halt_request = 0; - return kvm_vcpu_halt(vcpu); - } - - /* - * Note, return 1 and not 0, vcpu_run() is responsible for - * morphing the pending signal into the proper return code. - */ - if (signal_pending(current)) - return 1; - - if (need_resched()) - schedule(); - } - - return 1; -} - -static void grow_ple_window(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned int old = vmx->ple_window; - - vmx->ple_window = __grow_ple_window(old, ple_window, - ple_window_grow, - ple_window_max); - - if (vmx->ple_window != old) { - vmx->ple_window_dirty = true; - trace_kvm_ple_window_update(vcpu->vcpu_id, - vmx->ple_window, old); - } -} - -static void shrink_ple_window(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned int old = vmx->ple_window; - - vmx->ple_window = __shrink_ple_window(old, ple_window, - ple_window_shrink, - ple_window); - - if (vmx->ple_window != old) { - vmx->ple_window_dirty = true; - trace_kvm_ple_window_update(vcpu->vcpu_id, - vmx->ple_window, old); - } -} - -/* - * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. - */ -static void wakeup_handler(void) -{ - struct kvm_vcpu *vcpu; - int cpu = smp_processor_id(); - - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); - list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), - blocked_vcpu_list) { - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - if (pi_test_on(pi_desc) == 1) - kvm_vcpu_kick(vcpu); - } - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); -} - -static void vmx_enable_tdp(void) -{ - kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, - enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, - enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, - 0ull, VMX_EPT_EXECUTABLE_MASK, - cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, - VMX_EPT_RWX_MASK, 0ull); - - ept_set_mmio_spte_mask(); - kvm_enable_tdp(); -} - -/* - * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE - * exiting, so only get here on cpu with PAUSE-Loop-Exiting. - */ -static int handle_pause(struct kvm_vcpu *vcpu) -{ - if (!kvm_pause_in_guest(vcpu->kvm)) - grow_ple_window(vcpu); - - /* - * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" - * VM-execution control is ignored if CPL > 0. OTOH, KVM - * never set PAUSE_EXITING and just set PLE if supported, - * so the vcpu must be CPL=0 if it gets a PAUSE exit. - */ - kvm_vcpu_on_spin(vcpu, true); - return kvm_skip_emulated_instruction(vcpu); -} - -static int handle_nop(struct kvm_vcpu *vcpu) -{ - return kvm_skip_emulated_instruction(vcpu); -} - -static int handle_mwait(struct kvm_vcpu *vcpu) -{ - printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); - return handle_nop(vcpu); -} - -static int handle_invalid_op(struct kvm_vcpu *vcpu) -{ - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; -} - -static int handle_monitor_trap(struct kvm_vcpu *vcpu) -{ - return 1; -} - -static int handle_monitor(struct kvm_vcpu *vcpu) -{ - printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); - return handle_nop(vcpu); -} - -static int handle_invpcid(struct kvm_vcpu *vcpu) -{ - u32 vmx_instruction_info; - unsigned long type; - bool pcid_enabled; - gva_t gva; - struct x86_exception e; - unsigned i; - unsigned long roots_to_free = 0; - struct { - u64 pcid; - u64 gla; - } operand; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } - - vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); - - if (type > 3) { - kvm_inject_gp(vcpu, 0); - return 1; - } - - /* According to the Intel instruction reference, the memory operand - * is read even if it isn't needed (e.g., for type==all) - */ - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmx_instruction_info, false, - sizeof(operand), &gva)) - return 1; - - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_page_fault(vcpu, &e); - return 1; - } - - if (operand.pcid >> 12 != 0) { - kvm_inject_gp(vcpu, 0); - return 1; - } - - pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); - - switch (type) { - case INVPCID_TYPE_INDIV_ADDR: - if ((!pcid_enabled && (operand.pcid != 0)) || - is_noncanonical_address(operand.gla, vcpu)) { - kvm_inject_gp(vcpu, 0); - return 1; - } - kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid); - return kvm_skip_emulated_instruction(vcpu); - - case INVPCID_TYPE_SINGLE_CTXT: - if (!pcid_enabled && (operand.pcid != 0)) { - kvm_inject_gp(vcpu, 0); - return 1; - } - - if (kvm_get_active_pcid(vcpu) == operand.pcid) { - kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - } - - for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3) - == operand.pcid) - roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); - - kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free); - /* - * If neither the current cr3 nor any of the prev_roots use the - * given PCID, then nothing needs to be done here because a - * resync will happen anyway before switching to any other CR3. - */ - - return kvm_skip_emulated_instruction(vcpu); - - case INVPCID_TYPE_ALL_NON_GLOBAL: - /* - * Currently, KVM doesn't mark global entries in the shadow - * page tables, so a non-global flush just degenerates to a - * global flush. If needed, we could optimize this later by - * keeping track of global entries in shadow page tables. - */ - - /* fall-through */ - case INVPCID_TYPE_ALL_INCL_GLOBAL: - kvm_mmu_unload(vcpu); - return kvm_skip_emulated_instruction(vcpu); - - default: - BUG(); /* We have already checked above that type <= 3 */ - } -} - -static int handle_pml_full(struct kvm_vcpu *vcpu) -{ - unsigned long exit_qualification; - - trace_kvm_pml_full(vcpu->vcpu_id); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - /* - * PML buffer FULL happened while executing iret from NMI, - * "blocked by NMI" bit has to be set before next VM entry. - */ - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - enable_vnmi && - (exit_qualification & INTR_INFO_UNBLOCK_NMI)) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - - /* - * PML buffer already flushed at beginning of VMEXIT. Nothing to do - * here.., and there's no userspace involvement needed for PML. - */ - return 1; -} - -static int handle_preemption_timer(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!vmx->req_immediate_exit && - !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) - kvm_lapic_expired_hv_timer(vcpu); - - return 1; -} - -/* - * When nested=0, all VMX instruction VM Exits filter here. The handlers - * are overwritten by nested_vmx_setup() when nested=1. - */ -static int handle_vmx_instruction(struct kvm_vcpu *vcpu) -{ - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; -} - -static int handle_encls(struct kvm_vcpu *vcpu) -{ - /* - * SGX virtualization is not yet supported. There is no software - * enable bit for SGX, so we have to trap ENCLS and inject a #UD - * to prevent the guest from executing ENCLS. - */ - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; -} - -/* - * The exit handlers return 1 if the exit was handled fully and guest execution - * may resume. Otherwise they set the kvm_run parameter to indicate what needs - * to be done to userspace and return 0. - */ -static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { - [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi, - [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, - [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, - [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, - [EXIT_REASON_IO_INSTRUCTION] = handle_io, - [EXIT_REASON_CR_ACCESS] = handle_cr, - [EXIT_REASON_DR_ACCESS] = handle_dr, - [EXIT_REASON_CPUID] = kvm_emulate_cpuid, - [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr, - [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr, - [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window, - [EXIT_REASON_HLT] = kvm_emulate_halt, - [EXIT_REASON_INVD] = handle_invd, - [EXIT_REASON_INVLPG] = handle_invlpg, - [EXIT_REASON_RDPMC] = handle_rdpmc, - [EXIT_REASON_VMCALL] = handle_vmcall, - [EXIT_REASON_VMCLEAR] = handle_vmx_instruction, - [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction, - [EXIT_REASON_VMPTRLD] = handle_vmx_instruction, - [EXIT_REASON_VMPTRST] = handle_vmx_instruction, - [EXIT_REASON_VMREAD] = handle_vmx_instruction, - [EXIT_REASON_VMRESUME] = handle_vmx_instruction, - [EXIT_REASON_VMWRITE] = handle_vmx_instruction, - [EXIT_REASON_VMOFF] = handle_vmx_instruction, - [EXIT_REASON_VMON] = handle_vmx_instruction, - [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, - [EXIT_REASON_APIC_ACCESS] = handle_apic_access, - [EXIT_REASON_APIC_WRITE] = handle_apic_write, - [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, - [EXIT_REASON_WBINVD] = handle_wbinvd, - [EXIT_REASON_XSETBV] = handle_xsetbv, - [EXIT_REASON_TASK_SWITCH] = handle_task_switch, - [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, - [EXIT_REASON_GDTR_IDTR] = handle_desc, - [EXIT_REASON_LDTR_TR] = handle_desc, - [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, - [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, - [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, - [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, - [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap, - [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, - [EXIT_REASON_INVEPT] = handle_vmx_instruction, - [EXIT_REASON_INVVPID] = handle_vmx_instruction, - [EXIT_REASON_RDRAND] = handle_invalid_op, - [EXIT_REASON_RDSEED] = handle_invalid_op, - [EXIT_REASON_PML_FULL] = handle_pml_full, - [EXIT_REASON_INVPCID] = handle_invpcid, - [EXIT_REASON_VMFUNC] = handle_vmx_instruction, - [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, - [EXIT_REASON_ENCLS] = handle_encls, -}; - -static const int kvm_vmx_max_exit_handlers = - ARRAY_SIZE(kvm_vmx_exit_handlers); - -static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) -{ - *info1 = vmcs_readl(EXIT_QUALIFICATION); - *info2 = vmcs_read32(VM_EXIT_INTR_INFO); -} - -static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) -{ - if (vmx->pml_pg) { - __free_page(vmx->pml_pg); - vmx->pml_pg = NULL; - } -} - -static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 *pml_buf; - u16 pml_idx; - - pml_idx = vmcs_read16(GUEST_PML_INDEX); - - /* Do nothing if PML buffer is empty */ - if (pml_idx == (PML_ENTITY_NUM - 1)) - return; - - /* PML index always points to next available PML buffer entity */ - if (pml_idx >= PML_ENTITY_NUM) - pml_idx = 0; - else - pml_idx++; - - pml_buf = page_address(vmx->pml_pg); - for (; pml_idx < PML_ENTITY_NUM; pml_idx++) { - u64 gpa; - - gpa = pml_buf[pml_idx]; - WARN_ON(gpa & (PAGE_SIZE - 1)); - kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); - } - - /* reset PML index */ - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); -} - -/* - * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap. - * Called before reporting dirty_bitmap to userspace. - */ -static void kvm_flush_pml_buffers(struct kvm *kvm) -{ - int i; - struct kvm_vcpu *vcpu; - /* - * We only need to kick vcpu out of guest mode here, as PML buffer - * is flushed at beginning of all VMEXITs, and it's obvious that only - * vcpus running in guest are possible to have unflushed GPAs in PML - * buffer. - */ - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vcpu_kick(vcpu); -} - -static void vmx_dump_sel(char *name, uint32_t sel) -{ - pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", - name, vmcs_read16(sel), - vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), - vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), - vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); -} - -static void vmx_dump_dtsel(char *name, uint32_t limit) -{ - pr_err("%s limit=0x%08x, base=0x%016lx\n", - name, vmcs_read32(limit), - vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); -} - -void dump_vmcs(void) -{ - u32 vmentry_ctl, vmexit_ctl; - u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; - unsigned long cr4; - u64 efer; - int i, n; - - if (!dump_invalid_vmcs) { - pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); - return; - } - - vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); - vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); - cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); - cr4 = vmcs_readl(GUEST_CR4); - efer = vmcs_read64(GUEST_IA32_EFER); - secondary_exec_control = 0; - if (cpu_has_secondary_exec_ctrls()) - secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - - pr_err("*** Guest State ***\n"); - pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", - vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), - vmcs_readl(CR0_GUEST_HOST_MASK)); - pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", - cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK)); - pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3)); - if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && - (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) - { - pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n", - vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1)); - pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n", - vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3)); - } - pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", - vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); - pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n", - vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7)); - pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", - vmcs_readl(GUEST_SYSENTER_ESP), - vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP)); - vmx_dump_sel("CS: ", GUEST_CS_SELECTOR); - vmx_dump_sel("DS: ", GUEST_DS_SELECTOR); - vmx_dump_sel("SS: ", GUEST_SS_SELECTOR); - vmx_dump_sel("ES: ", GUEST_ES_SELECTOR); - vmx_dump_sel("FS: ", GUEST_FS_SELECTOR); - vmx_dump_sel("GS: ", GUEST_GS_SELECTOR); - vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT); - vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR); - vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT); - vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); - if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || - (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) - pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", - efer, vmcs_read64(GUEST_IA32_PAT)); - pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", - vmcs_read64(GUEST_IA32_DEBUGCTL), - vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); - if (cpu_has_load_perf_global_ctrl() && - vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016llx\n", - vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); - if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) - pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); - pr_err("Interruptibility = %08x ActivityState = %08x\n", - vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), - vmcs_read32(GUEST_ACTIVITY_STATE)); - if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) - pr_err("InterruptStatus = %04x\n", - vmcs_read16(GUEST_INTR_STATUS)); - - pr_err("*** Host State ***\n"); - pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", - vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); - pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n", - vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR), - vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR), - vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR), - vmcs_read16(HOST_TR_SELECTOR)); - pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n", - vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE), - vmcs_readl(HOST_TR_BASE)); - pr_err("GDTBase=%016lx IDTBase=%016lx\n", - vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE)); - pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n", - vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3), - vmcs_readl(HOST_CR4)); - pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", - vmcs_readl(HOST_IA32_SYSENTER_ESP), - vmcs_read32(HOST_IA32_SYSENTER_CS), - vmcs_readl(HOST_IA32_SYSENTER_EIP)); - if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) - pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", - vmcs_read64(HOST_IA32_EFER), - vmcs_read64(HOST_IA32_PAT)); - if (cpu_has_load_perf_global_ctrl() && - vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016llx\n", - vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); - - pr_err("*** Control State ***\n"); - pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", - pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control); - pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); - pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", - vmcs_read32(EXCEPTION_BITMAP), - vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), - vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH)); - pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", - vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), - vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE), - vmcs_read32(VM_ENTRY_INSTRUCTION_LEN)); - pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n", - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_read32(VM_EXIT_INTR_ERROR_CODE), - vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); - pr_err(" reason=%08x qualification=%016lx\n", - vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION)); - pr_err("IDTVectoring: info=%08x errcode=%08x\n", - vmcs_read32(IDT_VECTORING_INFO_FIELD), - vmcs_read32(IDT_VECTORING_ERROR_CODE)); - pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET)); - if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) - pr_err("TSC Multiplier = 0x%016llx\n", - vmcs_read64(TSC_MULTIPLIER)); - if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) { - if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { - u16 status = vmcs_read16(GUEST_INTR_STATUS); - pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff); - } - pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); - if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) - pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR)); - pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR)); - } - if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) - pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); - if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) - pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); - n = vmcs_read32(CR3_TARGET_COUNT); - for (i = 0; i + 1 < n; i += 4) - pr_err("CR3 target%u=%016lx target%u=%016lx\n", - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2), - i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2)); - if (i < n) - pr_err("CR3 target%u=%016lx\n", - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2)); - if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) - pr_err("PLE Gap=%08x Window=%08x\n", - vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); - if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) - pr_err("Virtual processor ID = 0x%04x\n", - vmcs_read16(VIRTUAL_PROCESSOR_ID)); -} - -/* - * The guest has exited. See if we can fix it or if we need userspace - * assistance. - */ -static int vmx_handle_exit(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion exit_fastpath) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 exit_reason = vmx->exit_reason; - u32 vectoring_info = vmx->idt_vectoring_info; - - trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); - - /* - * Flush logged GPAs PML buffer, this will make dirty_bitmap more - * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before - * querying dirty_bitmap, we only need to kick all vcpus out of guest - * mode as if vcpus is in root mode, the PML buffer must has been - * flushed already. - */ - if (enable_pml) - vmx_flush_pml_buffer(vcpu); - - /* If guest state is invalid, start emulating */ - if (vmx->emulation_required) - return handle_invalid_guest_state(vcpu); - - if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason)) - return nested_vmx_reflect_vmexit(vcpu, exit_reason); - - if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; - vcpu->run->fail_entry.hardware_entry_failure_reason - = exit_reason; - return 0; - } - - if (unlikely(vmx->fail)) { - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; - vcpu->run->fail_entry.hardware_entry_failure_reason - = vmcs_read32(VM_INSTRUCTION_ERROR); - return 0; - } - - /* - * Note: - * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by - * delivery event since it indicates guest is accessing MMIO. - * The vm-exit can be triggered again after return to guest that - * will cause infinite loop. - */ - if ((vectoring_info & VECTORING_INFO_VALID_MASK) && - (exit_reason != EXIT_REASON_EXCEPTION_NMI && - exit_reason != EXIT_REASON_EPT_VIOLATION && - exit_reason != EXIT_REASON_PML_FULL && - exit_reason != EXIT_REASON_TASK_SWITCH)) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; - vcpu->run->internal.ndata = 3; - vcpu->run->internal.data[0] = vectoring_info; - vcpu->run->internal.data[1] = exit_reason; - vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; - if (exit_reason == EXIT_REASON_EPT_MISCONFIG) { - vcpu->run->internal.ndata++; - vcpu->run->internal.data[3] = - vmcs_read64(GUEST_PHYSICAL_ADDRESS); - } - return 0; - } - - if (unlikely(!enable_vnmi && - vmx->loaded_vmcs->soft_vnmi_blocked)) { - if (vmx_interrupt_allowed(vcpu)) { - vmx->loaded_vmcs->soft_vnmi_blocked = 0; - } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && - vcpu->arch.nmi_pending) { - /* - * This CPU don't support us in finding the end of an - * NMI-blocked window if the guest runs with IRQs - * disabled. So we pull the trigger after 1 s of - * futile waiting, but inform the user about this. - */ - printk(KERN_WARNING "%s: Breaking out of NMI-blocked " - "state on VCPU %d after 1 s timeout\n", - __func__, vcpu->vcpu_id); - vmx->loaded_vmcs->soft_vnmi_blocked = 0; - } - } - - if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { - kvm_skip_emulated_instruction(vcpu); - return 1; - } else if (exit_reason < kvm_vmx_max_exit_handlers - && kvm_vmx_exit_handlers[exit_reason]) { -#ifdef CONFIG_RETPOLINE - if (exit_reason == EXIT_REASON_MSR_WRITE) - return kvm_emulate_wrmsr(vcpu); - else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) - return handle_preemption_timer(vcpu); - else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW) - return handle_interrupt_window(vcpu); - else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - return handle_external_interrupt(vcpu); - else if (exit_reason == EXIT_REASON_HLT) - return kvm_emulate_halt(vcpu); - else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) - return handle_ept_misconfig(vcpu); -#endif - return kvm_vmx_exit_handlers[exit_reason](vcpu); - } else { - vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", - exit_reason); - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 1; - vcpu->run->internal.data[0] = exit_reason; - return 0; - } -} - -/* - * Software based L1D cache flush which is used when microcode providing - * the cache control MSR is not loaded. - * - * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to - * flush it is required to read in 64 KiB because the replacement algorithm - * is not exactly LRU. This could be sized at runtime via topology - * information but as all relevant affected CPUs have 32KiB L1D cache size - * there is no point in doing so. - */ -static void vmx_l1d_flush(struct kvm_vcpu *vcpu) -{ - int size = PAGE_SIZE << L1D_CACHE_ORDER; - - /* - * This code is only executed when the the flush mode is 'cond' or - * 'always' - */ - if (static_branch_likely(&vmx_l1d_flush_cond)) { - bool flush_l1d; - - /* - * Clear the per-vcpu flush bit, it gets set again - * either from vcpu_run() or from one of the unsafe - * VMEXIT handlers. - */ - flush_l1d = vcpu->arch.l1tf_flush_l1d; - vcpu->arch.l1tf_flush_l1d = false; - - /* - * Clear the per-cpu flush bit, it gets set again from - * the interrupt handlers. - */ - flush_l1d |= kvm_get_cpu_l1tf_flush_l1d(); - kvm_clear_cpu_l1tf_flush_l1d(); - - if (!flush_l1d) - return; - } - - vcpu->stat.l1d_flush++; - - if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { - wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); - return; - } - - asm volatile( - /* First ensure the pages are in the TLB */ - "xorl %%eax, %%eax\n" - ".Lpopulate_tlb:\n\t" - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" - "addl $4096, %%eax\n\t" - "cmpl %%eax, %[size]\n\t" - "jne .Lpopulate_tlb\n\t" - "xorl %%eax, %%eax\n\t" - "cpuid\n\t" - /* Now fill the cache */ - "xorl %%eax, %%eax\n" - ".Lfill_cache:\n" - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" - "addl $64, %%eax\n\t" - "cmpl %%eax, %[size]\n\t" - "jne .Lfill_cache\n\t" - "lfence\n" - :: [flush_pages] "r" (vmx_l1d_flush_pages), - [size] "r" (size) - : "eax", "ebx", "ecx", "edx"); -} - -static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - int tpr_threshold; - - if (is_guest_mode(vcpu) && - nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) - return; - - tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr; - if (is_guest_mode(vcpu)) - to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold; - else - vmcs_write32(TPR_THRESHOLD, tpr_threshold); -} - -void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 sec_exec_control; - - if (!lapic_in_kernel(vcpu)) - return; - - if (!flexpriority_enabled && - !cpu_has_vmx_virtualize_x2apic_mode()) - return; - - /* Postpone execution until vmcs01 is the current VMCS. */ - if (is_guest_mode(vcpu)) { - vmx->nested.change_vmcs01_virtual_apic_mode = true; - return; - } - - sec_exec_control = secondary_exec_controls_get(vmx); - sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); - - switch (kvm_get_apic_mode(vcpu)) { - case LAPIC_MODE_INVALID: - WARN_ONCE(true, "Invalid local APIC state"); - case LAPIC_MODE_DISABLED: - break; - case LAPIC_MODE_XAPIC: - if (flexpriority_enabled) { - sec_exec_control |= - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmx_flush_tlb(vcpu, true); - } - break; - case LAPIC_MODE_X2APIC: - if (cpu_has_vmx_virtualize_x2apic_mode()) - sec_exec_control |= - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; - break; - } - secondary_exec_controls_set(vmx, sec_exec_control); - - vmx_update_msr_bitmap(vcpu); -} - -static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) -{ - if (!is_guest_mode(vcpu)) { - vmcs_write64(APIC_ACCESS_ADDR, hpa); - vmx_flush_tlb(vcpu, true); - } -} - -static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) -{ - u16 status; - u8 old; - - if (max_isr == -1) - max_isr = 0; - - status = vmcs_read16(GUEST_INTR_STATUS); - old = status >> 8; - if (max_isr != old) { - status &= 0xff; - status |= max_isr << 8; - vmcs_write16(GUEST_INTR_STATUS, status); - } -} - -static void vmx_set_rvi(int vector) -{ - u16 status; - u8 old; - - if (vector == -1) - vector = 0; - - status = vmcs_read16(GUEST_INTR_STATUS); - old = (u8)status & 0xff; - if ((u8)vector != old) { - status &= ~0xff; - status |= (u8)vector; - vmcs_write16(GUEST_INTR_STATUS, status); - } -} - -static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) -{ - /* - * When running L2, updating RVI is only relevant when - * vmcs12 virtual-interrupt-delivery enabled. - * However, it can be enabled only when L1 also - * intercepts external-interrupts and in that case - * we should not update vmcs02 RVI but instead intercept - * interrupt. Therefore, do nothing when running L2. - */ - if (!is_guest_mode(vcpu)) - vmx_set_rvi(max_irr); -} - -static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - int max_irr; - bool max_irr_updated; - - WARN_ON(!vcpu->arch.apicv_active); - if (pi_test_on(&vmx->pi_desc)) { - pi_clear_on(&vmx->pi_desc); - /* - * IOMMU can write to PID.ON, so the barrier matters even on UP. - * But on x86 this is just a compiler barrier anyway. - */ - smp_mb__after_atomic(); - max_irr_updated = - kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr); - - /* - * If we are running L2 and L1 has a new pending interrupt - * which can be injected, we should re-evaluate - * what should be done with this new L1 interrupt. - * If L1 intercepts external-interrupts, we should - * exit from L2 to L1. Otherwise, interrupt should be - * delivered directly to L2. - */ - if (is_guest_mode(vcpu) && max_irr_updated) { - if (nested_exit_on_intr(vcpu)) - kvm_vcpu_exiting_guest_mode(vcpu); - else - kvm_make_request(KVM_REQ_EVENT, vcpu); - } - } else { - max_irr = kvm_lapic_find_highest_irr(vcpu); - } - vmx_hwapic_irr_update(vcpu, max_irr); - return max_irr; -} - -static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - return pi_test_on(pi_desc) || - (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc)); -} - -static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) -{ - if (!kvm_vcpu_apicv_active(vcpu)) - return; - - vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); - vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); - vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); - vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); -} - -static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - pi_clear_on(&vmx->pi_desc); - memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); -} - -static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) -{ - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - - /* if exit due to PF check for async PF */ - if (is_page_fault(vmx->exit_intr_info)) - vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); - - /* Handle machine checks before interrupts are enabled */ - if (is_machine_check(vmx->exit_intr_info)) - kvm_machine_check(); - - /* We need to handle NMIs before interrupts are enabled */ - if (is_nmi(vmx->exit_intr_info)) { - kvm_before_interrupt(&vmx->vcpu); - asm("int $2"); - kvm_after_interrupt(&vmx->vcpu); - } -} - -static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) -{ - unsigned int vector; - unsigned long entry; -#ifdef CONFIG_X86_64 - unsigned long tmp; -#endif - gate_desc *desc; - u32 intr_info; - - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - if (WARN_ONCE(!is_external_intr(intr_info), - "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) - return; - - vector = intr_info & INTR_INFO_VECTOR_MASK; - desc = (gate_desc *)host_idt_base + vector; - entry = gate_offset(desc); - - kvm_before_interrupt(vcpu); - - asm volatile( -#ifdef CONFIG_X86_64 - "mov %%" _ASM_SP ", %[sp]\n\t" - "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" - "push $%c[ss]\n\t" - "push %[sp]\n\t" -#endif - "pushf\n\t" - __ASM_SIZE(push) " $%c[cs]\n\t" - CALL_NOSPEC - : -#ifdef CONFIG_X86_64 - [sp]"=&r"(tmp), -#endif - ASM_CALL_CONSTRAINT - : - THUNK_TARGET(entry), - [ss]"i"(__KERNEL_DS), - [cs]"i"(__KERNEL_CS) - ); - - kvm_after_interrupt(vcpu); -} -STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff); - -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion *exit_fastpath) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - handle_external_interrupt_irqoff(vcpu); - else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) - handle_exception_nmi_irqoff(vmx); - else if (!is_guest_mode(vcpu) && - vmx->exit_reason == EXIT_REASON_MSR_WRITE) - *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); -} - -static bool vmx_has_emulated_msr(int index) -{ - switch (index) { - case MSR_IA32_SMBASE: - /* - * We cannot do SMM unless we can run the guest in big - * real mode. - */ - return enable_unrestricted_guest || emulate_invalid_guest_state; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: - return nested; - case MSR_AMD64_VIRT_SPEC_CTRL: - /* This is AMD only. */ - return false; - default: - return true; - } -} - -static bool vmx_pt_supported(void) -{ - return pt_mode == PT_MODE_HOST_GUEST; -} - -static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) -{ - u32 exit_intr_info; - bool unblock_nmi; - u8 vector; - bool idtv_info_valid; - - idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; - - if (enable_vnmi) { - if (vmx->loaded_vmcs->nmi_known_unmasked) - return; - /* - * Can't use vmx->exit_intr_info since we're not sure what - * the exit reason is. - */ - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; - vector = exit_intr_info & INTR_INFO_VECTOR_MASK; - /* - * SDM 3: 27.7.1.2 (September 2008) - * Re-set bit "block by NMI" before VM entry if vmexit caused by - * a guest IRET fault. - * SDM 3: 23.2.2 (September 2008) - * Bit 12 is undefined in any of the following cases: - * If the VM exit sets the valid bit in the IDT-vectoring - * information field. - * If the VM exit is due to a double fault. - */ - if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && - vector != DF_VECTOR && !idtv_info_valid) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmx->loaded_vmcs->nmi_known_unmasked = - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) - & GUEST_INTR_STATE_NMI); - } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) - vmx->loaded_vmcs->vnmi_blocked_time += - ktime_to_ns(ktime_sub(ktime_get(), - vmx->loaded_vmcs->entry_time)); -} - -static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, - u32 idt_vectoring_info, - int instr_len_field, - int error_code_field) -{ - u8 vector; - int type; - bool idtv_info_valid; - - idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; - - vcpu->arch.nmi_injected = false; - kvm_clear_exception_queue(vcpu); - kvm_clear_interrupt_queue(vcpu); - - if (!idtv_info_valid) - return; - - kvm_make_request(KVM_REQ_EVENT, vcpu); - - vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; - type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; - - switch (type) { - case INTR_TYPE_NMI_INTR: - vcpu->arch.nmi_injected = true; - /* - * SDM 3: 27.7.1.2 (September 2008) - * Clear bit "block by NMI" before VM entry if a NMI - * delivery faulted. - */ - vmx_set_nmi_mask(vcpu, false); - break; - case INTR_TYPE_SOFT_EXCEPTION: - vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); - /* fall through */ - case INTR_TYPE_HARD_EXCEPTION: - if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { - u32 err = vmcs_read32(error_code_field); - kvm_requeue_exception_e(vcpu, vector, err); - } else - kvm_requeue_exception(vcpu, vector); - break; - case INTR_TYPE_SOFT_INTR: - vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); - /* fall through */ - case INTR_TYPE_EXT_INTR: - kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); - break; - default: - break; - } -} - -static void vmx_complete_interrupts(struct vcpu_vmx *vmx) -{ - __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, - VM_EXIT_INSTRUCTION_LEN, - IDT_VECTORING_ERROR_CODE); -} - -static void vmx_cancel_injection(struct kvm_vcpu *vcpu) -{ - __vmx_complete_interrupts(vcpu, - vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), - VM_ENTRY_INSTRUCTION_LEN, - VM_ENTRY_EXCEPTION_ERROR_CODE); - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); -} - -static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) -{ - int i, nr_msrs; - struct perf_guest_switch_msr *msrs; - - msrs = perf_guest_get_msrs(&nr_msrs); - - if (!msrs) - return; - - for (i = 0; i < nr_msrs; i++) - if (msrs[i].host == msrs[i].guest) - clear_atomic_switch_msr(vmx, msrs[i].msr); - else - add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest, - msrs[i].host, false); -} - -static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) -{ - u32 host_umwait_control; - - if (!vmx_has_waitpkg(vmx)) - return; - - host_umwait_control = get_umwait_control_msr(); - - if (vmx->msr_ia32_umwait_control != host_umwait_control) - add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, - vmx->msr_ia32_umwait_control, - host_umwait_control, false); - else - clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); -} - -static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 tscl; - u32 delta_tsc; - - if (vmx->req_immediate_exit) { - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0); - vmx->loaded_vmcs->hv_timer_soft_disabled = false; - } else if (vmx->hv_deadline_tsc != -1) { - tscl = rdtsc(); - if (vmx->hv_deadline_tsc > tscl) - /* set_hv_timer ensures the delta fits in 32-bits */ - delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> - cpu_preemption_timer_multi); - else - delta_tsc = 0; - - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); - vmx->loaded_vmcs->hv_timer_soft_disabled = false; - } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) { - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1); - vmx->loaded_vmcs->hv_timer_soft_disabled = true; - } -} - -void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) -{ - if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) { - vmx->loaded_vmcs->host_state.rsp = host_rsp; - vmcs_writel(HOST_RSP, host_rsp); - } -} - -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); - -static void vmx_vcpu_run(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr3, cr4; - - /* Record the guest's net vcpu time for enforced NMI injections. */ - if (unlikely(!enable_vnmi && - vmx->loaded_vmcs->soft_vnmi_blocked)) - vmx->loaded_vmcs->entry_time = ktime_get(); - - /* Don't enter VMX if guest state is invalid, let the exit handler - start emulation until we arrive back to a valid state */ - if (vmx->emulation_required) - return; - - if (vmx->ple_window_dirty) { - vmx->ple_window_dirty = false; - vmcs_write32(PLE_WINDOW, vmx->ple_window); - } - - if (vmx->nested.need_vmcs12_to_shadow_sync) - nested_sync_vmcs12_to_shadow(vcpu); - - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP)) - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP)) - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); - - cr3 = __get_current_cr3_fast(); - if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { - vmcs_writel(HOST_CR3, cr3); - vmx->loaded_vmcs->host_state.cr3 = cr3; - } - - cr4 = cr4_read_shadow(); - if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { - vmcs_writel(HOST_CR4, cr4); - vmx->loaded_vmcs->host_state.cr4 = cr4; - } - - /* When single-stepping over STI and MOV SS, we must clear the - * corresponding interruptibility bits in the guest state. Otherwise - * vmentry fails as it then expects bit 14 (BS) in pending debug - * exceptions being set, but that's not correct for the guest debugging - * case. */ - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vmx_set_interrupt_shadow(vcpu, 0); - - kvm_load_guest_xsave_state(vcpu); - - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && - vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vcpu->arch.pkru); - - pt_guest_enter(vmx); - - atomic_switch_perf_msrs(vmx); - atomic_switch_umwait_control_msr(vmx); - - if (enable_preemption_timer) - vmx_update_hv_timer(vcpu); - - if (lapic_in_kernel(vcpu) && - vcpu->arch.apic->lapic_timer.timer_advance_ns) - kvm_wait_lapic_expire(vcpu); - - /* - * If this vCPU has touched SPEC_CTRL, restore the guest's value if - * it's non-zero. Since vmentry is serialising on affected CPUs, there - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ - x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); - - /* L1D Flush includes CPU buffer clear to mitigate MDS */ - if (static_branch_unlikely(&vmx_l1d_should_flush)) - vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) - mds_clear_cpu_buffers(); - - if (vcpu->arch.cr2 != read_cr2()) - write_cr2(vcpu->arch.cr2); - - vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); - - vcpu->arch.cr2 = read_cr2(); - - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); - - /* All fields are clean at this point */ - if (static_branch_unlikely(&enable_evmcs)) - current_evmcs->hv_clean_fields |= - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - - if (static_branch_unlikely(&enable_evmcs)) - current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index; - - /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ - if (vmx->host_debugctlmsr) - update_debugctlmsr(vmx->host_debugctlmsr); - -#ifndef CONFIG_X86_64 - /* - * The sysexit path does not restore ds/es, so we must set them to - * a reasonable value ourselves. - * - * We can't defer this to vmx_prepare_switch_to_host() since that - * function may be executed in interrupt context, which saves and - * restore segments around it, nullifying its effect. - */ - loadsegment(ds, __USER_DS); - loadsegment(es, __USER_DS); -#endif - - vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) - | (1 << VCPU_EXREG_RFLAGS) - | (1 << VCPU_EXREG_PDPTR) - | (1 << VCPU_EXREG_SEGMENTS) - | (1 << VCPU_EXREG_CR3)); - vcpu->arch.regs_dirty = 0; - - pt_guest_exit(vmx); - - /* - * eager fpu is enabled if PKEY is supported and CR4 is switched - * back on host, so it is safe to read guest PKRU from current - * XSAVE. - */ - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { - vcpu->arch.pkru = rdpkru(); - if (vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vmx->host_pkru); - } - - kvm_load_host_xsave_state(vcpu); - - vmx->nested.nested_run_pending = 0; - vmx->idt_vectoring_info = 0; - - vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); - if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) - kvm_machine_check(); - - if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) - return; - - vmx->loaded_vmcs->launched = 1; - vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - - vmx_recover_nmi_blocking(vmx); - vmx_complete_interrupts(vmx); -} - -static struct kvm *vmx_vm_alloc(void) -{ - struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx), - GFP_KERNEL_ACCOUNT | __GFP_ZERO, - PAGE_KERNEL); - return &kvm_vmx->kvm; -} - -static void vmx_vm_free(struct kvm *kvm) -{ - kfree(kvm->arch.hyperv.hv_pa_pg); - vfree(to_kvm_vmx(kvm)); -} - -static void vmx_free_vcpu(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (enable_pml) - vmx_destroy_pml_buffer(vmx); - free_vpid(vmx->vpid); - nested_vmx_free_vcpu(vcpu); - free_loaded_vmcs(vmx->loaded_vmcs); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); - kmem_cache_free(kvm_vcpu_cache, vmx); -} - -static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) -{ - int err; - struct vcpu_vmx *vmx; - unsigned long *msr_bitmap; - int i, cpu; - - BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0, - "struct kvm_vcpu must be at offset 0 for arch usercopy region"); - - vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); - if (!vmx) - return ERR_PTR(-ENOMEM); - - vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.user_fpu) { - printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); - err = -ENOMEM; - goto free_partial_vcpu; - } - - vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.guest_fpu) { - printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); - err = -ENOMEM; - goto free_user_fpu; - } - - vmx->vpid = allocate_vpid(); - - err = kvm_vcpu_init(&vmx->vcpu, kvm, id); - if (err) - goto free_vcpu; - - err = -ENOMEM; - - /* - * If PML is turned on, failure on enabling PML just results in failure - * of creating the vcpu, therefore we can simplify PML logic (by - * avoiding dealing with cases, such as enabling PML partially on vcpus - * for the guest), etc. - */ - if (enable_pml) { - vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!vmx->pml_pg) - goto uninit_vcpu; - } - - BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS); - - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { - u32 index = vmx_msr_index[i]; - u32 data_low, data_high; - int j = vmx->nmsrs; - - if (rdmsr_safe(index, &data_low, &data_high) < 0) - continue; - if (wrmsr_safe(index, data_low, data_high) < 0) - continue; - - vmx->guest_msrs[j].index = i; - vmx->guest_msrs[j].data = 0; - switch (index) { - case MSR_IA32_TSX_CTRL: - /* - * No need to pass TSX_CTRL_CPUID_CLEAR through, so - * let's avoid changing CPUID bits under the host - * kernel's feet. - */ - vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; - break; - default: - vmx->guest_msrs[j].mask = -1ull; - break; - } - ++vmx->nmsrs; - } - - err = alloc_loaded_vmcs(&vmx->vmcs01); - if (err < 0) - goto free_pml; - - msr_bitmap = vmx->vmcs01.msr_bitmap; - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - if (kvm_cstate_in_guest(kvm)) { - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); - } - vmx->msr_bitmap_mode = 0; - - vmx->loaded_vmcs = &vmx->vmcs01; - cpu = get_cpu(); - vmx_vcpu_load(&vmx->vcpu, cpu); - vmx->vcpu.cpu = cpu; - init_vmcs(vmx); - vmx_vcpu_put(&vmx->vcpu); - put_cpu(); - if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { - err = alloc_apic_access_page(kvm); - if (err) - goto free_vmcs; - } - - if (enable_ept && !enable_unrestricted_guest) { - err = init_rmode_identity_map(kvm); - if (err) - goto free_vmcs; - } - - if (nested) - nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, - vmx_capability.ept, - kvm_vcpu_apicv_active(&vmx->vcpu)); - else - memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); - - vmx->nested.posted_intr_nv = -1; - vmx->nested.current_vmptr = -1ull; - - vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; - - /* - * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR - * or POSTED_INTR_WAKEUP_VECTOR. - */ - vmx->pi_desc.nv = POSTED_INTR_VECTOR; - vmx->pi_desc.sn = 1; - - vmx->ept_pointer = INVALID_PAGE; - - return &vmx->vcpu; - -free_vmcs: - free_loaded_vmcs(vmx->loaded_vmcs); -free_pml: - vmx_destroy_pml_buffer(vmx); -uninit_vcpu: - kvm_vcpu_uninit(&vmx->vcpu); -free_vcpu: - free_vpid(vmx->vpid); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); -free_user_fpu: - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); -free_partial_vcpu: - kmem_cache_free(kvm_vcpu_cache, vmx); - return ERR_PTR(err); -} - -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" - -static int vmx_vm_init(struct kvm *kvm) -{ - spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock); - - if (!ple_gap) - kvm->arch.pause_in_guest = true; - - if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) { - switch (l1tf_mitigation) { - case L1TF_MITIGATION_OFF: - case L1TF_MITIGATION_FLUSH_NOWARN: - /* 'I explicitly don't care' is set */ - break; - case L1TF_MITIGATION_FLUSH: - case L1TF_MITIGATION_FLUSH_NOSMT: - case L1TF_MITIGATION_FULL: - /* - * Warn upon starting the first VM in a potentially - * insecure environment. - */ - if (sched_smt_active()) - pr_warn_once(L1TF_MSG_SMT); - if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) - pr_warn_once(L1TF_MSG_L1D); - break; - case L1TF_MITIGATION_FULL_FORCE: - /* Flush is enforced */ - break; - } - } - return 0; -} - -static int __init vmx_check_processor_compat(void) -{ - struct vmcs_config vmcs_conf; - struct vmx_capability vmx_cap; - - if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) - return -EIO; - if (nested) - nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, - enable_apicv); - if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { - printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", - smp_processor_id()); - return -EIO; - } - return 0; -} - -static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - u8 cache; - u64 ipat = 0; - - /* For VT-d and EPT combination - * 1. MMIO: always map as UC - * 2. EPT with VT-d: - * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. - * b. VT-d with snooping control feature: snooping control feature of - * VT-d engine can guarantee the cache correctness. Just set it - * to WB to keep consistent with host. So the same as item 3. - * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep - * consistent with host MTRR - */ - if (is_mmio) { - cache = MTRR_TYPE_UNCACHABLE; - goto exit; - } - - if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { - ipat = VMX_EPT_IPAT_BIT; - cache = MTRR_TYPE_WRBACK; - goto exit; - } - - if (kvm_read_cr0(vcpu) & X86_CR0_CD) { - ipat = VMX_EPT_IPAT_BIT; - if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) - cache = MTRR_TYPE_WRBACK; - else - cache = MTRR_TYPE_UNCACHABLE; - goto exit; - } - - cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn); - -exit: - return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat; -} - -static int vmx_get_lpage_level(void) -{ - if (enable_ept && !cpu_has_vmx_ept_1g_page()) - return PT_DIRECTORY_LEVEL; - else - /* For shadow and EPT supported 1GB page */ - return PT_PDPE_LEVEL; -} - -static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx) -{ - /* - * These bits in the secondary execution controls field - * are dynamic, the others are mostly based on the hypervisor - * architecture and the guest's CPUID. Do not touch the - * dynamic bits. - */ - u32 mask = - SECONDARY_EXEC_SHADOW_VMCS | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_DESC; - - u32 new_ctl = vmx->secondary_exec_control; - u32 cur_ctl = secondary_exec_controls_get(vmx); - - secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask)); -} - -/* - * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits - * (indicating "allowed-1") if they are supported in the guest's CPUID. - */ -static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_cpuid_entry2 *entry; - - vmx->nested.msrs.cr0_fixed1 = 0xffffffff; - vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE; - -#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \ - if (entry && (entry->_reg & (_cpuid_mask))) \ - vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \ -} while (0) - - entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); - cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC)); - cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE)); - cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE)); - cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE)); - cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE)); - cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE)); - cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR)); - cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM)); - cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX)); - cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX)); - cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID)); - cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE)); - - entry = kvm_find_cpuid_entry(vcpu, 0x7, 0); - cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE)); - cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); - cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); - cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); - cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); - cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57)); - -#undef cr4_fixed1_update -} - -static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (kvm_mpx_supported()) { - bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX); - - if (mpx_enabled) { - vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; - vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; - } else { - vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS; - vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS; - } - } -} - -static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_cpuid_entry2 *best = NULL; - int i; - - for (i = 0; i < PT_CPUID_LEAVES; i++) { - best = kvm_find_cpuid_entry(vcpu, 0x14, i); - if (!best) - return; - vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax; - vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx; - vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx; - vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx; - } - - /* Get the number of configurable Address Ranges for filtering */ - vmx->pt_desc.addr_range = intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges); - - /* Initialize and clear the no dependency bits */ - vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS | - RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC); - - /* - * If CPUID.(EAX=14H,ECX=0):EBX[0]=1 CR3Filter can be set otherwise - * will inject an #GP - */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN; - - /* - * If CPUID.(EAX=14H,ECX=0):EBX[1]=1 CYCEn, CycThresh and - * PSBFreq can be set - */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc)) - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC | - RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ); - - /* - * If CPUID.(EAX=14H,ECX=0):EBX[3]=1 MTCEn BranchEn and - * MTCFreq can be set - */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc)) - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN | - RTIT_CTL_BRANCH_EN | RTIT_CTL_MTC_RANGE); - - /* If CPUID.(EAX=14H,ECX=0):EBX[4]=1 FUPonPTW and PTWEn can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite)) - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW | - RTIT_CTL_PTW_EN); - - /* If CPUID.(EAX=14H,ECX=0):EBX[5]=1 PwrEvEn can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN; - - /* If CPUID.(EAX=14H,ECX=0):ECX[0]=1 ToPA can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA; - - /* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabircEn can be set */ - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys)) - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN; - - /* unmask address range configure area */ - for (i = 0; i < vmx->pt_desc.addr_range; i++) - vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4)); -} - -static void vmx_cpuid_update(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */ - vcpu->arch.xsaves_enabled = false; - - if (cpu_has_secondary_exec_ctrls()) { - vmx_compute_secondary_exec_control(vmx); - vmcs_set_secondary_exec_control(vmx); - } - - if (nested_vmx_allowed(vcpu)) - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - else - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); - - if (nested_vmx_allowed(vcpu)) { - nested_vmx_cr_fixed1_bits_update(vcpu); - nested_vmx_entry_exit_ctls_update(vcpu); - } - - if (boot_cpu_has(X86_FEATURE_INTEL_PT) && - guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) - update_intel_pt_cfg(vcpu); - - if (boot_cpu_has(X86_FEATURE_RTM)) { - struct shared_msr_entry *msr; - msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL); - if (msr) { - bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM); - vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE); - } - } -} - -static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) -{ - if (func == 1 && nested) - entry->ecx |= bit(X86_FEATURE_VMX); -} - -static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) -{ - to_vmx(vcpu)->req_immediate_exit = true; -} - -static int vmx_check_intercept(struct kvm_vcpu *vcpu, - struct x86_instruction_info *info, - enum x86_intercept_stage stage) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; - - /* - * RDPID causes #UD if disabled through secondary execution controls. - * Because it is marked as EmulateOnUD, we need to intercept it here. - */ - if (info->intercept == x86_intercept_rdtscp && - !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { - ctxt->exception.vector = UD_VECTOR; - ctxt->exception.error_code_valid = false; - return X86EMUL_PROPAGATE_FAULT; - } - - /* TODO: check more intercepts... */ - return X86EMUL_CONTINUE; -} - -#ifdef CONFIG_X86_64 -/* (a << shift) / divisor, return 1 if overflow otherwise 0 */ -static inline int u64_shl_div_u64(u64 a, unsigned int shift, - u64 divisor, u64 *result) -{ - u64 low = a << shift, high = a >> (64 - shift); - - /* To avoid the overflow on divq */ - if (high >= divisor) - return 1; - - /* Low hold the result, high hold rem which is discarded */ - asm("divq %2\n\t" : "=a" (low), "=d" (high) : - "rm" (divisor), "0" (low), "1" (high)); - *result = low; - - return 0; -} - -static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, - bool *expired) -{ - struct vcpu_vmx *vmx; - u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; - struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; - - if (kvm_mwait_in_guest(vcpu->kvm) || - kvm_can_post_timer_interrupt(vcpu)) - return -EOPNOTSUPP; - - vmx = to_vmx(vcpu); - tscl = rdtsc(); - guest_tscl = kvm_read_l1_tsc(vcpu, tscl); - delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; - lapic_timer_advance_cycles = nsec_to_cycles(vcpu, - ktimer->timer_advance_ns); - - if (delta_tsc > lapic_timer_advance_cycles) - delta_tsc -= lapic_timer_advance_cycles; - else - delta_tsc = 0; - - /* Convert to host delta tsc if tsc scaling is enabled */ - if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && - delta_tsc && u64_shl_div_u64(delta_tsc, - kvm_tsc_scaling_ratio_frac_bits, - vcpu->arch.tsc_scaling_ratio, &delta_tsc)) - return -ERANGE; - - /* - * If the delta tsc can't fit in the 32 bit after the multi shift, - * we can't use the preemption timer. - * It's possible that it fits on later vmentries, but checking - * on every vmentry is costly so we just use an hrtimer. - */ - if (delta_tsc >> (cpu_preemption_timer_multi + 32)) - return -ERANGE; - - vmx->hv_deadline_tsc = tscl + delta_tsc; - *expired = !delta_tsc; - return 0; -} - -static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) -{ - to_vmx(vcpu)->hv_deadline_tsc = -1; -} -#endif - -static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) -{ - if (!kvm_pause_in_guest(vcpu->kvm)) - shrink_ple_window(vcpu); -} - -static void vmx_slot_enable_log_dirty(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ - kvm_mmu_slot_leaf_clear_dirty(kvm, slot); - kvm_mmu_slot_largepage_remove_write_access(kvm, slot); -} - -static void vmx_slot_disable_log_dirty(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ - kvm_mmu_slot_set_dirty(kvm, slot); -} - -static void vmx_flush_log_dirty(struct kvm *kvm) -{ - kvm_flush_pml_buffers(kvm); -} - -static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) -{ - struct vmcs12 *vmcs12; - struct vcpu_vmx *vmx = to_vmx(vcpu); - gpa_t gpa, dst; - - if (is_guest_mode(vcpu)) { - WARN_ON_ONCE(vmx->nested.pml_full); - - /* - * Check if PML is enabled for the nested guest. - * Whether eptp bit 6 is set is already checked - * as part of A/D emulation. - */ - vmcs12 = get_vmcs12(vcpu); - if (!nested_cpu_has_pml(vmcs12)) - return 0; - - if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) { - vmx->nested.pml_full = true; - return 1; - } - - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; - dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; - - if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, - offset_in_page(dst), sizeof(gpa))) - return 0; - - vmcs12->guest_pml_index--; - } - - return 0; -} - -static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, - struct kvm_memory_slot *memslot, - gfn_t offset, unsigned long mask) -{ - kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); -} - -static void __pi_post_block(struct kvm_vcpu *vcpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct pi_desc old, new; - unsigned int dest; - - do { - old.control = new.control = pi_desc->control; - WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, - "Wakeup handler not enabled while the VCPU is blocked\n"); - - dest = cpu_physical_id(vcpu->cpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - /* set 'NV' to 'notification vector' */ - new.nv = POSTED_INTR_VECTOR; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); - - if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - vcpu->pre_pcpu = -1; - } -} - -/* - * This routine does the following things for vCPU which is going - * to be blocked if VT-d PI is enabled. - * - Store the vCPU to the wakeup list, so when interrupts happen - * we can find the right vCPU to wake up. - * - Change the Posted-interrupt descriptor as below: - * 'NDST' <-- vcpu->pre_pcpu - * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR - * - If 'ON' is set during this process, which means at least one - * interrupt is posted for this vCPU, we cannot block it, in - * this case, return 1, otherwise, return 0. - * - */ -static int pi_pre_block(struct kvm_vcpu *vcpu) -{ - unsigned int dest; - struct pi_desc old, new; - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) - return 0; - - WARN_ON(irqs_disabled()); - local_irq_disable(); - if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { - vcpu->pre_pcpu = vcpu->cpu; - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - list_add_tail(&vcpu->blocked_vcpu_list, - &per_cpu(blocked_vcpu_on_cpu, - vcpu->pre_pcpu)); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - } - - do { - old.control = new.control = pi_desc->control; - - WARN((pi_desc->sn == 1), - "Warning: SN field of posted-interrupts " - "is set before blocking\n"); - - /* - * Since vCPU can be preempted during this process, - * vcpu->cpu could be different with pre_pcpu, we - * need to set pre_pcpu as the destination of wakeup - * notification event, then we can find the right vCPU - * to wakeup in wakeup handler if interrupts happen - * when the vCPU is in blocked state. - */ - dest = cpu_physical_id(vcpu->pre_pcpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - /* set 'NV' to 'wakeup vector' */ - new.nv = POSTED_INTR_WAKEUP_VECTOR; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); - - /* We should not block the vCPU if an interrupt is posted for it. */ - if (pi_test_on(pi_desc) == 1) - __pi_post_block(vcpu); - - local_irq_enable(); - return (vcpu->pre_pcpu == -1); -} - -static int vmx_pre_block(struct kvm_vcpu *vcpu) -{ - if (pi_pre_block(vcpu)) - return 1; - - if (kvm_lapic_hv_timer_in_use(vcpu)) - kvm_lapic_switch_to_sw_timer(vcpu); - - return 0; -} - -static void pi_post_block(struct kvm_vcpu *vcpu) -{ - if (vcpu->pre_pcpu == -1) - return; - - WARN_ON(irqs_disabled()); - local_irq_disable(); - __pi_post_block(vcpu); - local_irq_enable(); -} - -static void vmx_post_block(struct kvm_vcpu *vcpu) -{ - if (kvm_x86_ops->set_hv_timer) - kvm_lapic_switch_to_hv_timer(vcpu); - - pi_post_block(vcpu); -} - -/* - * vmx_update_pi_irte - set IRTE for Posted-Interrupts - * - * @kvm: kvm - * @host_irq: host irq of the interrupt - * @guest_irq: gsi of the interrupt - * @set: set or unset PI - * returns 0 on success, < 0 on failure - */ -static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set) -{ - struct kvm_kernel_irq_routing_entry *e; - struct kvm_irq_routing_table *irq_rt; - struct kvm_lapic_irq irq; - struct kvm_vcpu *vcpu; - struct vcpu_data vcpu_info; - int idx, ret = 0; - - if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(kvm->vcpus[0])) - return 0; - - idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (guest_irq >= irq_rt->nr_rt_entries || - hlist_empty(&irq_rt->map[guest_irq])) { - pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", - guest_irq, irq_rt->nr_rt_entries); - goto out; - } - - hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { - if (e->type != KVM_IRQ_ROUTING_MSI) - continue; - /* - * VT-d PI cannot support posting multicast/broadcast - * interrupts to a vCPU, we still use interrupt remapping - * for these kind of interrupts. - * - * For lowest-priority interrupts, we only support - * those with single CPU as the destination, e.g. user - * configures the interrupts via /proc/irq or uses - * irqbalance to make the interrupts single-CPU. - * - * We will support full lowest-priority interrupt later. - * - * In addition, we can only inject generic interrupts using - * the PI mechanism, refuse to route others through it. - */ - - kvm_set_msi_irq(kvm, e, &irq); - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || - !kvm_irq_is_postable(&irq)) { - /* - * Make sure the IRTE is in remapped mode if - * we don't handle it in posted mode. - */ - ret = irq_set_vcpu_affinity(host_irq, NULL); - if (ret < 0) { - printk(KERN_INFO - "failed to back to remapped mode, irq: %u\n", - host_irq); - goto out; - } - - continue; - } - - vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); - vcpu_info.vector = irq.vector; - - trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, - vcpu_info.vector, vcpu_info.pi_desc_addr, set); - - if (set) - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else - ret = irq_set_vcpu_affinity(host_irq, NULL); - - if (ret < 0) { - printk(KERN_INFO "%s: failed to update PI IRTE\n", - __func__); - goto out; - } - } - - ret = 0; -out: - srcu_read_unlock(&kvm->irq_srcu, idx); - return ret; -} - -static void vmx_setup_mce(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.mcg_cap & MCG_LMCE_P) - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_LMCE; - else - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~FEATURE_CONTROL_LMCE; -} - -static int vmx_smi_allowed(struct kvm_vcpu *vcpu) -{ - /* we need a nested vmexit to enter SMM, postpone if run is pending */ - if (to_vmx(vcpu)->nested.nested_run_pending) - return 0; - return 1; -} - -static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - vmx->nested.smm.guest_mode = is_guest_mode(vcpu); - if (vmx->nested.smm.guest_mode) - nested_vmx_vmexit(vcpu, -1, 0, 0); - - vmx->nested.smm.vmxon = vmx->nested.vmxon; - vmx->nested.vmxon = false; - vmx_clear_hlt(vcpu); - return 0; -} - -static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - int ret; - - if (vmx->nested.smm.vmxon) { - vmx->nested.vmxon = true; - vmx->nested.smm.vmxon = false; - } - - if (vmx->nested.smm.guest_mode) { - ret = nested_vmx_enter_non_root_mode(vcpu, false); - if (ret) - return ret; - - vmx->nested.smm.guest_mode = false; - } - return 0; -} - -static int enable_smi_window(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) -{ - return false; -} - -static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) -{ - return to_vmx(vcpu)->nested.vmxon; -} - -static __init int hardware_setup(void) -{ - unsigned long host_bndcfgs; - struct desc_ptr dt; - int r, i; - - rdmsrl_safe(MSR_EFER, &host_efer); - - store_idt(&dt); - host_idt_base = dt.address; - - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) - kvm_define_shared_msr(i, vmx_msr_index[i]); - - if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) - return -EIO; - - if (boot_cpu_has(X86_FEATURE_NX)) - kvm_enable_efer_bits(EFER_NX); - - if (boot_cpu_has(X86_FEATURE_MPX)) { - rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs); - WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost"); - } - - if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || - !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) - enable_vpid = 0; - - if (!cpu_has_vmx_ept() || - !cpu_has_vmx_ept_4levels() || - !cpu_has_vmx_ept_mt_wb() || - !cpu_has_vmx_invept_global()) - enable_ept = 0; - - if (!cpu_has_vmx_ept_ad_bits() || !enable_ept) - enable_ept_ad_bits = 0; - - if (!cpu_has_vmx_unrestricted_guest() || !enable_ept) - enable_unrestricted_guest = 0; - - if (!cpu_has_vmx_flexpriority()) - flexpriority_enabled = 0; - - if (!cpu_has_virtual_nmis()) - enable_vnmi = 0; - - /* - * set_apic_access_page_addr() is used to reload apic access - * page upon invalidation. No need to do anything if not - * using the APIC_ACCESS_ADDR VMCS field. - */ - if (!flexpriority_enabled) - kvm_x86_ops->set_apic_access_page_addr = NULL; - - if (!cpu_has_vmx_tpr_shadow()) - kvm_x86_ops->update_cr8_intercept = NULL; - - if (enable_ept && !cpu_has_vmx_ept_2m_page()) - kvm_disable_largepages(); - -#if IS_ENABLED(CONFIG_HYPERV) - if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH - && enable_ept) { - kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb; - kvm_x86_ops->tlb_remote_flush_with_range = - hv_remote_flush_tlb_with_range; - } -#endif - - if (!cpu_has_vmx_ple()) { - ple_gap = 0; - ple_window = 0; - ple_window_grow = 0; - ple_window_max = 0; - ple_window_shrink = 0; - } - - if (!cpu_has_vmx_apicv()) { - enable_apicv = 0; - kvm_x86_ops->sync_pir_to_irr = NULL; - } - - if (cpu_has_vmx_tsc_scaling()) { - kvm_has_tsc_control = true; - kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; - kvm_tsc_scaling_ratio_frac_bits = 48; - } - - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - - if (enable_ept) - vmx_enable_tdp(); - else - kvm_disable_tdp(); - - /* - * Only enable PML when hardware supports PML feature, and both EPT - * and EPT A/D bit features are enabled -- PML depends on them to work. - */ - if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml()) - enable_pml = 0; - - if (!enable_pml) { - kvm_x86_ops->slot_enable_log_dirty = NULL; - kvm_x86_ops->slot_disable_log_dirty = NULL; - kvm_x86_ops->flush_log_dirty = NULL; - kvm_x86_ops->enable_log_dirty_pt_masked = NULL; - } - - if (!cpu_has_vmx_preemption_timer()) - enable_preemption_timer = false; - - if (enable_preemption_timer) { - u64 use_timer_freq = 5000ULL * 1000 * 1000; - u64 vmx_msr; - - rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); - cpu_preemption_timer_multi = - vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; - - if (tsc_khz) - use_timer_freq = (u64)tsc_khz * 1000; - use_timer_freq >>= cpu_preemption_timer_multi; - - /* - * KVM "disables" the preemption timer by setting it to its max - * value. Don't use the timer if it might cause spurious exits - * at a rate faster than 0.1 Hz (of uninterrupted guest time). - */ - if (use_timer_freq > 0xffffffffu / 10) - enable_preemption_timer = false; - } - - if (!enable_preemption_timer) { - kvm_x86_ops->set_hv_timer = NULL; - kvm_x86_ops->cancel_hv_timer = NULL; - kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; - } - - kvm_set_posted_intr_wakeup_handler(wakeup_handler); - - kvm_mce_cap_supported |= MCG_LMCE_P; - - if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST) - return -EINVAL; - if (!enable_ept || !cpu_has_vmx_intel_pt()) - pt_mode = PT_MODE_SYSTEM; - - if (nested) { - nested_vmx_setup_ctls_msrs(&vmcs_config.nested, - vmx_capability.ept, enable_apicv); - - r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); - if (r) - return r; - } - - r = alloc_kvm_area(); - if (r) - nested_vmx_hardware_unsetup(); - return r; -} - -static __exit void hardware_unsetup(void) -{ - if (nested) - nested_vmx_hardware_unsetup(); - - free_kvm_area(); -} - -static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { - .cpu_has_kvm_support = cpu_has_kvm_support, - .disabled_by_bios = vmx_disabled_by_bios, - .hardware_setup = hardware_setup, - .hardware_unsetup = hardware_unsetup, - .check_processor_compatibility = vmx_check_processor_compat, - .hardware_enable = hardware_enable, - .hardware_disable = hardware_disable, - .cpu_has_accelerated_tpr = report_flexpriority, - .has_emulated_msr = vmx_has_emulated_msr, - - .vm_init = vmx_vm_init, - .vm_alloc = vmx_vm_alloc, - .vm_free = vmx_vm_free, - - .vcpu_create = vmx_create_vcpu, - .vcpu_free = vmx_free_vcpu, - .vcpu_reset = vmx_vcpu_reset, - - .prepare_guest_switch = vmx_prepare_switch_to_guest, - .vcpu_load = vmx_vcpu_load, - .vcpu_put = vmx_vcpu_put, - - .update_bp_intercept = update_exception_bitmap, - .get_msr_feature = vmx_get_msr_feature, - .get_msr = vmx_get_msr, - .set_msr = vmx_set_msr, - .get_segment_base = vmx_get_segment_base, - .get_segment = vmx_get_segment, - .set_segment = vmx_set_segment, - .get_cpl = vmx_get_cpl, - .get_cs_db_l_bits = vmx_get_cs_db_l_bits, - .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, - .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, - .set_cr0 = vmx_set_cr0, - .set_cr3 = vmx_set_cr3, - .set_cr4 = vmx_set_cr4, - .set_efer = vmx_set_efer, - .get_idt = vmx_get_idt, - .set_idt = vmx_set_idt, - .get_gdt = vmx_get_gdt, - .set_gdt = vmx_set_gdt, - .get_dr6 = vmx_get_dr6, - .set_dr6 = vmx_set_dr6, - .set_dr7 = vmx_set_dr7, - .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, - .cache_reg = vmx_cache_reg, - .get_rflags = vmx_get_rflags, - .set_rflags = vmx_set_rflags, - - .tlb_flush = vmx_flush_tlb, - .tlb_flush_gva = vmx_flush_tlb_gva, - - .run = vmx_vcpu_run, - .handle_exit = vmx_handle_exit, - .skip_emulated_instruction = skip_emulated_instruction, - .set_interrupt_shadow = vmx_set_interrupt_shadow, - .get_interrupt_shadow = vmx_get_interrupt_shadow, - .patch_hypercall = vmx_patch_hypercall, - .set_irq = vmx_inject_irq, - .set_nmi = vmx_inject_nmi, - .queue_exception = vmx_queue_exception, - .cancel_injection = vmx_cancel_injection, - .interrupt_allowed = vmx_interrupt_allowed, - .nmi_allowed = vmx_nmi_allowed, - .get_nmi_mask = vmx_get_nmi_mask, - .set_nmi_mask = vmx_set_nmi_mask, - .enable_nmi_window = enable_nmi_window, - .enable_irq_window = enable_irq_window, - .update_cr8_intercept = update_cr8_intercept, - .set_virtual_apic_mode = vmx_set_virtual_apic_mode, - .set_apic_access_page_addr = vmx_set_apic_access_page_addr, - .get_enable_apicv = vmx_get_enable_apicv, - .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, - .load_eoi_exitmap = vmx_load_eoi_exitmap, - .apicv_post_state_restore = vmx_apicv_post_state_restore, - .hwapic_irr_update = vmx_hwapic_irr_update, - .hwapic_isr_update = vmx_hwapic_isr_update, - .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, - .sync_pir_to_irr = vmx_sync_pir_to_irr, - .deliver_posted_interrupt = vmx_deliver_posted_interrupt, - .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt, - - .set_tss_addr = vmx_set_tss_addr, - .set_identity_map_addr = vmx_set_identity_map_addr, - .get_tdp_level = get_ept_level, - .get_mt_mask = vmx_get_mt_mask, - - .get_exit_info = vmx_get_exit_info, - - .get_lpage_level = vmx_get_lpage_level, - - .cpuid_update = vmx_cpuid_update, - - .rdtscp_supported = vmx_rdtscp_supported, - .invpcid_supported = vmx_invpcid_supported, - - .set_supported_cpuid = vmx_set_supported_cpuid, - - .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, - - .read_l1_tsc_offset = vmx_read_l1_tsc_offset, - .write_l1_tsc_offset = vmx_write_l1_tsc_offset, - - .set_tdp_cr3 = vmx_set_cr3, - - .check_intercept = vmx_check_intercept, - .handle_exit_irqoff = vmx_handle_exit_irqoff, - .mpx_supported = vmx_mpx_supported, - .xsaves_supported = vmx_xsaves_supported, - .umip_emulated = vmx_umip_emulated, - .pt_supported = vmx_pt_supported, - - .request_immediate_exit = vmx_request_immediate_exit, - - .sched_in = vmx_sched_in, - - .slot_enable_log_dirty = vmx_slot_enable_log_dirty, - .slot_disable_log_dirty = vmx_slot_disable_log_dirty, - .flush_log_dirty = vmx_flush_log_dirty, - .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, - .write_log_dirty = vmx_write_pml_buffer, - - .pre_block = vmx_pre_block, - .post_block = vmx_post_block, - - .pmu_ops = &intel_pmu_ops, - - .update_pi_irte = vmx_update_pi_irte, - -#ifdef CONFIG_X86_64 - .set_hv_timer = vmx_set_hv_timer, - .cancel_hv_timer = vmx_cancel_hv_timer, -#endif - - .setup_mce = vmx_setup_mce, - - .smi_allowed = vmx_smi_allowed, - .pre_enter_smm = vmx_pre_enter_smm, - .pre_leave_smm = vmx_pre_leave_smm, - .enable_smi_window = enable_smi_window, - - .check_nested_events = NULL, - .get_nested_state = NULL, - .set_nested_state = NULL, - .get_vmcs12_pages = NULL, - .nested_enable_evmcs = NULL, - .nested_get_evmcs_version = NULL, - .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, - .apic_init_signal_blocked = vmx_apic_init_signal_blocked, -}; - -static void vmx_cleanup_l1d_flush(void) -{ - if (vmx_l1d_flush_pages) { - free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER); - vmx_l1d_flush_pages = NULL; - } - /* Restore state so sysfs ignores VMX */ - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; -} - -static void vmx_exit(void) -{ -#ifdef CONFIG_KEXEC_CORE - RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); - synchronize_rcu(); -#endif - - kvm_exit(); - -#if IS_ENABLED(CONFIG_HYPERV) - if (static_branch_unlikely(&enable_evmcs)) { - int cpu; - struct hv_vp_assist_page *vp_ap; - /* - * Reset everything to support using non-enlightened VMCS - * access later (e.g. when we reload the module with - * enlightened_vmcs=0) - */ - for_each_online_cpu(cpu) { - vp_ap = hv_get_vp_assist_page(cpu); - - if (!vp_ap) - continue; - - vp_ap->nested_control.features.directhypercall = 0; - vp_ap->current_nested_vmcs = 0; - vp_ap->enlighten_vmentry = 0; - } - - static_branch_disable(&enable_evmcs); - } -#endif - vmx_cleanup_l1d_flush(); -} -module_exit(vmx_exit); - -static int __init vmx_init(void) -{ - int r; - -#if IS_ENABLED(CONFIG_HYPERV) - /* - * Enlightened VMCS usage should be recommended and the host needs - * to support eVMCS v1 or above. We can also disable eVMCS support - * with module parameter. - */ - if (enlightened_vmcs && - ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && - (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= - KVM_EVMCS_VERSION) { - int cpu; - - /* Check that we have assist pages on all online CPUs */ - for_each_online_cpu(cpu) { - if (!hv_get_vp_assist_page(cpu)) { - enlightened_vmcs = false; - break; - } - } - - if (enlightened_vmcs) { - pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); - static_branch_enable(&enable_evmcs); - } - - if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) - vmx_x86_ops.enable_direct_tlbflush - = hv_enable_direct_tlbflush; - - } else { - enlightened_vmcs = false; - } -#endif - - r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), - __alignof__(struct vcpu_vmx), THIS_MODULE); - if (r) - return r; - - /* - * Must be called after kvm_init() so enable_ept is properly set - * up. Hand the parameter mitigation value in which was stored in - * the pre module init parser. If no parameter was given, it will - * contain 'auto' which will be turned into the default 'cond' - * mitigation mode. - */ - r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); - if (r) { - vmx_exit(); - return r; - } - -#ifdef CONFIG_KEXEC_CORE - rcu_assign_pointer(crash_vmclear_loaded_vmcss, - crash_vmclear_local_loaded_vmcss); -#endif - vmx_check_vmcs12_offsets(); - - return 0; -} -module_init(vmx_init); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d6851636edab323d49791e67a11a7c8d84f273d8..5f44827e496207d8a778b91f442f21bb5f5a907b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6638,7 +6638,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); } -static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) +static int inject_pending_event(struct kvm_vcpu *vcpu) { int r; @@ -6665,7 +6665,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) } if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { - r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + r = kvm_x86_ops->check_nested_events(vcpu); if (r != 0) return r; } @@ -6706,7 +6706,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) * KVM_REQ_EVENT only on certain events and not unconditionally? */ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { - r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + r = kvm_x86_ops->check_nested_events(vcpu); if (r != 0) return r; } @@ -7152,7 +7152,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } - if (inject_pending_event(vcpu, req_int_win) != 0) + if (inject_pending_event(vcpu) != 0) req_immediate_exit = true; else { /* Enable NMI/IRQ window open exits if needed. @@ -7360,7 +7360,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) - kvm_x86_ops->check_nested_events(vcpu, false); + kvm_x86_ops->check_nested_events(vcpu); return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted); @@ -8584,6 +8584,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, { int i; + /* + * Clear out the previous array pointers for the KVM_MR_MOVE case. The + * old arrays will be freed by __kvm_set_memory_region() if installing + * the new memslot is successful. + */ + memset(&slot->arch, 0, sizeof(slot->arch)); + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { struct kvm_lpage_info *linfo; unsigned long ugfn; @@ -8657,6 +8664,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { + if (change == KVM_MR_MOVE) + return kvm_arch_create_memslot(kvm, memslot, + mem->memory_size >> PAGE_SHIFT); + return 0; } diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 0a0e9112f2842e5b40e4e2284b2c061c4d98370a..5cb9f009f2be3f32c2bd8316d0e5f9805f6b4ea5 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -909,7 +909,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 9fe656c42aa5b16560e139cebba247ca52756c80..2b0450a3c8ab2c05b54596cb9c5fd2197422af07 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -170,6 +170,7 @@ bool ex_has_fault_handler(unsigned long ip) return handler == ex_handler_fault; } +__nocfi int fixup_exception(struct pt_regs *regs, int trapnr) { const struct exception_table_entry *e; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 03370c29510533c55a4211690c9594bd01b53c31..d3df6fa5dc639309fd5b66148652ea02fb63ee74 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -272,7 +272,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } -void vmalloc_sync_all(void) +static void vmalloc_sync(void) { unsigned long address; @@ -299,6 +299,16 @@ void vmalloc_sync_all(void) } } +void vmalloc_sync_mappings(void) +{ + vmalloc_sync(); +} + +void vmalloc_sync_unmappings(void) +{ + vmalloc_sync(); +} + /* * 32-bit: * @@ -401,11 +411,23 @@ static void dump_pagetable(unsigned long address) #else /* CONFIG_X86_64: */ -void vmalloc_sync_all(void) +void vmalloc_sync_mappings(void) { + /* + * 64-bit mappings might allocate new p4d/pud pages + * that need to be propagated to all tasks' PGDs. + */ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); } +void vmalloc_sync_unmappings(void) +{ + /* + * Unmappings never allocate or free p4d/pud pages. + * No work is required here. + */ +} + /* * 64-bit: * diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 624edfbff02de4d794dea06041e4d3a1cb669cbb..4937d6f7c256bffdfb1f320ce61a8564f7c1ed04 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1184,7 +1184,7 @@ int kernel_set_to_readonly; void set_kernel_text_rw(void) { unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(__stop___ex_table); + unsigned long end = PFN_ALIGN(_etext); if (!kernel_set_to_readonly) return; @@ -1203,7 +1203,7 @@ void set_kernel_text_rw(void) void set_kernel_text_ro(void) { unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(__stop___ex_table); + unsigned long end = PFN_ALIGN(_etext); if (!kernel_set_to_readonly) return; @@ -1222,7 +1222,7 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long rodata_start = PFN_ALIGN(__start_rodata); unsigned long end = (unsigned long) &__end_rodata_hpage_align; - unsigned long text_end = PFN_ALIGN(&__stop___ex_table); + unsigned long text_end = PFN_ALIGN(&_etext); unsigned long rodata_end = PFN_ALIGN(&__end_rodata); unsigned long all_end; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 835620ab435fdb55101c783c26461c6a4d47b2c0..eaee1a7ed0b5061a60aa2c2fb27a661682ba7208 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -2077,19 +2077,13 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(0), + .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), .flags = 0, }; if (!(__supported_pte_mask & _PAGE_NX)) goto out; - if (!(page_flags & _PAGE_NX)) - cpa.mask_clr = __pgprot(_PAGE_NX); - - if (!(page_flags & _PAGE_RW)) - cpa.mask_clr = __pgprot(_PAGE_RW); - if (!(page_flags & _PAGE_ENC)) cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 33c6ee9aebbd0f802deee90bc38039fdecb4379b..639f56dc626a151040d1dc66780d17bb5d749cf9 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -367,6 +367,15 @@ static void __init pti_clone_entry_text(void) pti_clone_pmds((unsigned long) __entry_text_start, (unsigned long) __irqentry_text_end, _PAGE_RW | _PAGE_GLOBAL); + + /* + * If CFI is enabled, also map jump tables, so the entry code can + * make indirect calls. + */ + if (IS_ENABLED(CONFIG_CFI_CLANG)) + pti_clone_pmds((unsigned long) __cfi_jt_start, + (unsigned long) __cfi_jt_end, + _PAGE_RW | _PAGE_GLOBAL); } /* diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index cdb386fa710134712bf01147bc60c94fe87bdd0b..0415c0cd4a1915f396e7e3b3769fc93fe8e372ed 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -153,6 +153,19 @@ static bool is_ereg(u32 reg) BIT(BPF_REG_AX)); } +/* + * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64 + * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte + * of encoding. al,cl,dl,bl have simpler encoding. + */ +static bool is_ereg_8l(u32 reg) +{ + return is_ereg(reg) || + (1 << reg) & (BIT(BPF_REG_1) | + BIT(BPF_REG_2) | + BIT(BPF_REG_FP)); +} + /* add modifiers if 'reg' maps to x64 registers r8..r15 */ static u8 add_1mod(u8 byte, u32 reg) { @@ -770,9 +783,8 @@ st: if (is_imm8(insn->off)) /* STX: *(u8*)(dst_reg + off) = src_reg */ case BPF_STX | BPF_MEM | BPF_B: /* emit 'mov byte ptr [rax + off], al' */ - if (is_ereg(dst_reg) || is_ereg(src_reg) || - /* have to add extra byte for x86 SIL, DIL regs */ - src_reg == BPF_REG_1 || src_reg == BPF_REG_2) + if (is_ereg(dst_reg) || is_ereg_8l(src_reg)) + /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); else EMIT1(0x88); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 335a62e74a2e9ad8d1751bd1aef1bc556d2ce172..e7f19dec16b9704927681dab9c74715f32b75faa 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -480,7 +480,6 @@ void __init efi_init(void) efi_char16_t *c16; char vendor[100] = "unknown"; int i = 0; - void *tmp; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -505,14 +504,16 @@ void __init efi_init(void) /* * Show what we know for posterity */ - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2); + c16 = early_memremap_ro(efi.systab->fw_vendor, + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i) + vendor[i] = c16[i]; vendor[i] = '\0'; - } else + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } else { pr_err("Could not map the firmware vendor!\n"); - early_memunmap(tmp, 2); + } pr_info("EFI v%u.%.02u by %s\n", efi.systab->hdr.revision >> 16, @@ -929,16 +930,14 @@ static void __init __efi_enter_virtual_mode(void) if (efi_alloc_page_tables()) { pr_err("Failed to allocate EFI page tables\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } pa = __pa(new_memmap); @@ -952,8 +951,7 @@ static void __init __efi_enter_virtual_mode(void) if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { pr_err("Failed to remap late EFI memory map\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } if (efi_enabled(EFI_DBG)) { @@ -961,12 +959,11 @@ static void __init __efi_enter_virtual_mode(void) efi_print_memmap(); } - BUG_ON(!efi.systab); + if (WARN_ON(!efi.systab)) + goto err; - if (efi_setup_page_tables(pa, 1 << pg_shift)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; - } + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; efi_sync_low_kernel_mappings(); @@ -986,9 +983,9 @@ static void __init __efi_enter_virtual_mode(void) } if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", - status); - panic("EFI call to SetVirtualAddressMap() failed!"); + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; } /* @@ -1015,6 +1012,10 @@ static void __init __efi_enter_virtual_mode(void) /* clean DUMMY object */ efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); } void __init efi_enter_virtual_mode(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ae369c2bbc3ebbf508e7c64bd32a8a9e77950848..0ebb7f94fd518c3cff6f94ea2494ade4a098ab7b 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -390,11 +390,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; page = alloc_page(GFP_KERNEL|__GFP_DMA32); - if (!page) - panic("Unable to allocate EFI runtime stack < 4GB\n"); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } - efi_scratch.phys_stack = virt_to_phys(page_address(page)); - efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 220e97841e494c41e21b242a053b9ffcefeb2b4a..4334dfb24ed66e8fe1e9afa77e501c0496b7cad9 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -46,6 +46,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "^(xen_irq_disable_direct_reloc$|" "xen_save_fl_direct_reloc$|" "VDSO|" + "__typeid__|" "__crc_)", /* @@ -781,6 +782,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, add_reloc(&relocs32neg, offset); break; + case R_X86_64_8: + if (!shn_abs || !is_reloc(S_ABS, symname)) + die("Non-whitelisted %s relocation: %s\n", + rel_type(r_type), symname); + break; + case R_X86_64_32: case R_X86_64_32S: case R_X86_64_64: diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f79a0cdc6b4e7ba23100a40447d981a89c929a02..1f8175bf2a5e300df11571dc585467e6ed1ba783 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -909,14 +909,15 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; +#ifdef CONFIG_X86_64 + unsigned int which; + u64 base; +#endif ret = 0; switch (msr) { #ifdef CONFIG_X86_64 - unsigned which; - u64 base; - case MSR_FS_BASE: which = SEGBASE_FS; goto set; case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index afbbe5750a1f85b635e7cd9a903314c0729c26d4..7d7aee024ece28b8fc121f89ddad7f06a6d41887 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -499,12 +499,13 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, */ entity = &bfqg->entity; for_each_entity(entity) { - bfqg = container_of(entity, struct bfq_group, entity); - if (bfqg != bfqd->root_group) { - parent = bfqg_parent(bfqg); + struct bfq_group *curr_bfqg = container_of(entity, + struct bfq_group, entity); + if (curr_bfqg != bfqd->root_group) { + parent = bfqg_parent(curr_bfqg); if (!parent) parent = bfqd->root_group; - bfq_group_set_parent(bfqg, parent); + bfq_group_set_parent(curr_bfqg, parent); } } diff --git a/block/blk-ioc.c b/block/blk-ioc.c index f23311e4b201fe60a6cf460a5c194cf43fd8f65b..e56a480b6f929b296d25609b54920268ad38db71 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -87,6 +87,7 @@ static void ioc_destroy_icq(struct io_cq *icq) * making it impossible to determine icq_cache. Record it in @icq. */ icq->__rcu_icq_cache = et->icq_cache; + icq->flags |= ICQ_DESTROYED; call_rcu(&icq->__rcu_head, icq_free_icq_rcu); } @@ -230,15 +231,21 @@ static void __ioc_clear_queue(struct list_head *icq_list) { unsigned long flags; + rcu_read_lock(); while (!list_empty(icq_list)) { struct io_cq *icq = list_entry(icq_list->next, struct io_cq, q_node); struct io_context *ioc = icq->ioc; spin_lock_irqsave(&ioc->lock, flags); + if (icq->flags & ICQ_DESTROYED) { + spin_unlock_irqrestore(&ioc->lock, flags); + continue; + } ioc_destroy_icq(icq); spin_unlock_irqrestore(&ioc->lock, flags); } + rcu_read_unlock(); } /** diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 3d2ab65d2dd15a012d3ff1efb71ad5b5aca6ddd5..e4b3eeaffc821aa61a03c1a9fad2782ea7ce6eb9 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -334,6 +334,13 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, struct blk_mq_hw_ctx *hctx; int i; + /* + * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and + * queue_hw_ctx after freeze the queue, so we use q_usage_counter + * to avoid race with it. + */ + if (!percpu_ref_tryget(&q->q_usage_counter)) + return; queue_for_each_hw_ctx(q, hctx, i) { struct blk_mq_tags *tags = hctx->tags; @@ -349,7 +356,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, bt_for_each(hctx, &tags->breserved_tags, fn, priv, true); bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false); } - + blk_queue_exit(q); } static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, diff --git a/block/blk-mq.c b/block/blk-mq.c index eac4448047366bb60db76788833f6e5b977dd467..9d53f476c5179713a41b5e9e672d9152626519cd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2748,6 +2748,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_unfreeze_queue(q); + /* + * Sync with blk_mq_queue_tag_busy_iter. + */ + synchronize_rcu(); } void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) diff --git a/block/blk-settings.c b/block/blk-settings.c index 6c2faaa38cc1e47f41cb81f2e223df82f6db4c4b..e0a744921ed3d349a5a43711364781b2f4d702e6 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -717,6 +717,9 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", top, bottom); } + + t->backing_dev_info->io_pages = + t->limits.max_sectors >> (PAGE_SHIFT - 9); } EXPORT_SYMBOL(disk_stack_limits); diff --git a/build.config.common b/build.config.common index d4754486cd821612b3bf4ee18de1708eba2af2fe..6176b14b9b88c4043d4be7901b282ca4f6fd42a4 100644 --- a/build.config.common +++ b/build.config.common @@ -3,7 +3,7 @@ KERNEL_DIR=common CC=clang LD=ld.lld -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r370808/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r377782c/bin BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 EXTRA_CMDS='' diff --git a/drivers/Kconfig b/drivers/Kconfig index 83e7ecb8600b22bc6a31212b52f09aa07fff906d..43377a3acb970b42badd910de16be6035869d945 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -201,6 +201,8 @@ source "drivers/thunderbolt/Kconfig" source "drivers/android/Kconfig" +source "drivers/gpu/trace/Kconfig" + source "drivers/nvdimm/Kconfig" source "drivers/dax/Kconfig" diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 95600309ce420a21d32f50f6b8095a6fe335fb78..0bd1899a287f37b88595ac3dfc8e8f42ea5d845d 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -58,12 +58,14 @@ static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) } #endif +static bool acpi_no_watchdog; + static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) { const struct acpi_table_wdat *wdat = NULL; acpi_status status; - if (acpi_disabled) + if (acpi_disabled || acpi_no_watchdog) return NULL; status = acpi_get_table(ACPI_SIG_WDAT, 0, @@ -91,6 +93,14 @@ bool acpi_has_watchdog(void) } EXPORT_SYMBOL_GPL(acpi_has_watchdog); +/* ACPI watchdog can be disabled on boot command line */ +static int __init disable_acpi_watchdog(char *str) +{ + acpi_no_watchdog = true; + return 1; +} +__setup("acpi_no_watchdog", disable_acpi_watchdog); + void __init acpi_watchdog_init(void) { const struct acpi_wdat_entry *entries; @@ -129,12 +139,11 @@ void __init acpi_watchdog_init(void) gas = &entries[i].register_region; res.start = gas->address; + res.end = res.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { res.flags = IORESOURCE_MEM; - res.end = res.start + ALIGN(gas->access_width, 4) - 1; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { res.flags = IORESOURCE_IO; - res.end = res.start + gas->access_width - 1; } else { pr_warn("Unsupported address space: %u\n", gas->space_id); diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c index 7bcf5f5ea0297ffcec2cbd39b9171e4ba93afd50..8df4a49a99a6b1031b47f8767ab31f0c59b125da 100644 --- a/drivers/acpi/acpica/dsfield.c +++ b/drivers/acpi/acpica/dsfield.c @@ -273,7 +273,7 @@ acpi_ds_create_buffer_field(union acpi_parse_object *op, * FUNCTION: acpi_ds_get_field_names * * PARAMETERS: info - create_field info structure - * ` walk_state - Current method state + * walk_state - Current method state * arg - First parser arg for the field name list * * RETURN: Status diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index eaa859a89702a2290d62e87ecf5ef9131afe708a..1d82e1419397e469146ed101f0d044d6f368d6d6 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -444,6 +444,27 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state) ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "Op=%p State=%p\n", op, walk_state)); + /* + * Disassembler: handle create field operators here. + * + * create_buffer_field is a deferred op that is typically processed in load + * pass 2. However, disassembly of control method contents walk the parse + * tree with ACPI_PARSE_LOAD_PASS1 and AML_CREATE operators are processed + * in a later walk. This is a problem when there is a control method that + * has the same name as the AML_CREATE object. In this case, any use of the + * name segment will be detected as a method call rather than a reference + * to a buffer field. + * + * This earlier creation during disassembly solves this issue by inserting + * the named object in the ACPI namespace so that references to this name + * would be a name string rather than a method call. + */ + if ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE) && + (walk_state->op_info->flags & AML_CREATE)) { + status = acpi_ds_create_buffer_field(op, walk_state); + return_ACPI_STATUS(status); + } + /* We are only interested in opcodes that have an associated name */ if (!(walk_state->op_info->flags & (AML_NAMED | AML_FIELD))) { diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index cd6fae6ad4c2a02948e2fdd8790bcd7c41c39b84..3f9f286088fae68ba41ebceb74bbba7c4bd23ba8 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -201,7 +201,7 @@ static int ghes_estatus_pool_expand(unsigned long len) * New allocation must be visible in all pgd before it can be found by * an NMI allocating from the pool. */ - vmalloc_sync_all(); + vmalloc_sync_mappings(); return gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); } diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index d21ee85ab260bad52f6bb7750d654b83dc3ee684..a75f4d9a2729d23c71d15940ae10d03ca48326e4 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -227,13 +227,13 @@ int acpi_device_set_power(struct acpi_device *device, int state) end: if (result) { dev_warn(&device->dev, "Failed to change power state to %s\n", - acpi_power_state_string(state)); + acpi_power_state_string(target_state)); } else { device->power.state = target_state; ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] transitioned to %s\n", device->pnp.bus_id, - acpi_power_state_string(state))); + acpi_power_state_string(target_state))); } return result; diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 7e91575496b6703939270362b57311a50370e399..1872dc01be132092c0e0a30582e291a2433563f7 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -214,7 +214,7 @@ static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd, if (call_pkg) { int i; - if (nfit_mem->family != call_pkg->nd_family) + if (nfit_mem && nfit_mem->family != call_pkg->nd_family) return -ENOTTY; for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++) @@ -223,6 +223,10 @@ static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd, return call_pkg->nd_command; } + /* In the !call_pkg case, bus commands == bus functions */ + if (!nfit_mem) + return cmd; + /* Linux ND commands == NVDIMM_FAMILY_INTEL function numbers */ if (nfit_mem->family == NVDIMM_FAMILY_INTEL) return cmd; @@ -238,6 +242,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); union acpi_object in_obj, in_buf, *out_obj; const struct nd_cmd_desc *desc = NULL; struct device *dev = acpi_desc->dev; @@ -252,18 +257,18 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, if (cmd_rc) *cmd_rc = -EINVAL; + if (cmd == ND_CMD_CALL) + call_pkg = buf; + func = cmd_to_func(nfit_mem, cmd, call_pkg); + if (func < 0) + return func; + if (nvdimm) { - struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); struct acpi_device *adev = nfit_mem->adev; if (!adev) return -ENOTTY; - if (cmd == ND_CMD_CALL) - call_pkg = buf; - func = cmd_to_func(nfit_mem, cmd, call_pkg); - if (func < 0) - return func; dimm_name = nvdimm_name(nvdimm); cmd_name = nvdimm_cmd_name(cmd); cmd_mask = nvdimm_cmd_mask(nvdimm); @@ -274,12 +279,9 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, } else { struct acpi_device *adev = to_acpi_dev(acpi_desc); - func = cmd; cmd_name = nvdimm_bus_cmd_name(cmd); cmd_mask = nd_desc->cmd_mask; - dsm_mask = cmd_mask; - if (cmd == ND_CMD_CALL) - dsm_mask = nd_desc->bus_dsm_mask; + dsm_mask = nd_desc->bus_dsm_mask; desc = nd_cmd_bus_desc(cmd); guid = to_nfit_uuid(NFIT_DEV_BUS); handle = adev->handle; diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index 7f9aff4b8d627db3accf80a12054b698887a314d..9fdc13a2f2d5986af51e73a22a72c1df67484ece 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -909,13 +909,6 @@ static long __acpi_processor_get_throttling(void *data) return pr->throttling.acpi_processor_get_throttling(pr); } -static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct) -{ - if (direct || (is_percpu_thread() && cpu == smp_processor_id())) - return fn(arg); - return work_on_cpu(cpu, fn, arg); -} - static int acpi_processor_get_throttling(struct acpi_processor *pr) { if (!pr) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 920b1ca35bf03f456781fc13ade77ac747122c46..7bd038edc1f77728e1cb800e706b5c1281ef9d10 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5192,6 +5192,7 @@ static int binder_open(struct inode *nodp, struct file *filp) binder_dev = container_of(filp->private_data, struct binder_device, miscdev); } + refcount_inc(&binder_dev->ref); proc->context = &binder_dev->context; binder_alloc_init(&proc->alloc); @@ -5369,6 +5370,7 @@ static int binder_node_release(struct binder_node *node, int refs) static void binder_deferred_release(struct binder_proc *proc) { struct binder_context *context = proc->context; + struct binder_device *device; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, active_transactions; @@ -5387,6 +5389,12 @@ static void binder_deferred_release(struct binder_proc *proc) context->binder_context_mgr_node = NULL; } mutex_unlock(&context->context_mgr_node_lock); + device = container_of(proc->context, struct binder_device, context); + if (refcount_dec_and_test(&device->ref)) { + kfree(context->name); + kfree(device); + } + proc->context = NULL; binder_inner_proc_lock(proc); /* * Make sure proc stays alive after we @@ -6058,6 +6066,7 @@ static int __init init_binder_device(const char *name) binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; binder_device->miscdev.name = name; + refcount_set(&binder_device->ref, 1); binder_device->context.binder_context_mgr_uid = INVALID_UID; binder_device->context.name = name; mutex_init(&binder_device->context.context_mgr_node_lock); diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index ad2d54d2023dcf110fe459df4f706910181abaf8..880affe45b07921297dc109cf99240b1fa6ddac2 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -922,8 +922,8 @@ enum lru_status binder_alloc_free_page(struct list_head *item, mm = alloc->vma_vm_mm; if (!mmget_not_zero(mm)) goto err_mmget; - if (!down_write_trylock(&mm->mmap_sem)) - goto err_down_write_mmap_sem_failed; + if (!down_read_trylock(&mm->mmap_sem)) + goto err_down_read_mmap_sem_failed; vma = binder_alloc_get_vma(alloc); list_lru_isolate(lru, item); @@ -936,7 +936,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, trace_binder_unmap_user_end(alloc, index); } - up_write(&mm->mmap_sem); + up_read(&mm->mmap_sem); mmput(mm); trace_binder_unmap_kernel_start(alloc, index); @@ -950,7 +950,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, mutex_unlock(&alloc->mutex); return LRU_REMOVED_RETRY; -err_down_write_mmap_sem_failed: +err_down_read_mmap_sem_failed: mmput_async(mm); err_mmget: err_page_already_freed: diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index bd47f7f72075a9676e84380d7c2f9f875878ebca..8d0bffcc9e271fd649a65a7e0df68eee1cd7bfcd 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ struct binder_device { struct miscdevice miscdev; struct binder_context context; struct inode *binderfs_inode; + refcount_t ref; }; /** diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 4eca0e1fbbd8e8853f34ab5f1897405f6c001f32..c25ab8bc8b4b6af253cbafaea7bbee85d8ff3ea0 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -154,6 +154,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode, if (!name) goto err; + refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->context.binder_context_mgr_uid = INVALID_UID; device->context.name = name; @@ -257,8 +258,10 @@ static void binderfs_evict_inode(struct inode *inode) ida_remove(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); - kfree(device->context.name); - kfree(device); + if (refcount_dec_and_test(&device->ref)) { + kfree(device->context.name); + kfree(device); + } } /** diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index f003e301723a4a21d8e3e479d7f2e37d64fa7899..0905c07b8c7eb2a690e37f48ef146a771257a1f7 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -88,6 +88,7 @@ enum board_ids { static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); static void ahci_remove_one(struct pci_dev *dev); +static void ahci_shutdown_one(struct pci_dev *dev); static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline); static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, @@ -586,6 +587,7 @@ static struct pci_driver ahci_pci_driver = { .id_table = ahci_pci_tbl, .probe = ahci_init_one, .remove = ahci_remove_one, + .shutdown = ahci_shutdown_one, .driver = { .pm = &ahci_pci_pm_ops, }, @@ -1823,6 +1825,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; } +static void ahci_shutdown_one(struct pci_dev *pdev) +{ + ata_pci_shutdown_one(pdev); +} + static void ahci_remove_one(struct pci_dev *pdev) { pm_runtime_get_noresume(&pdev->dev); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 08f67c109429441e95ce639772b4532c902aa26d..33eb5e342a7a9293654859c33fcad0fc9f218a5f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6706,6 +6706,26 @@ void ata_pci_remove_one(struct pci_dev *pdev) ata_host_detach(host); } +void ata_pci_shutdown_one(struct pci_dev *pdev) +{ + struct ata_host *host = pci_get_drvdata(pdev); + int i; + + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; + + ap->pflags |= ATA_PFLAG_FROZEN; + + /* Disable port interrupts */ + if (ap->ops->freeze) + ap->ops->freeze(ap); + + /* Stop the port DMA engines */ + if (ap->ops->port_stop) + ap->ops->port_stop(ap); + } +} + /* move to PCI subsystem */ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits) { @@ -7326,6 +7346,7 @@ EXPORT_SYMBOL_GPL(ata_timing_cycle2mode); #ifdef CONFIG_PCI EXPORT_SYMBOL_GPL(pci_test_config_bits); +EXPORT_SYMBOL_GPL(ata_pci_shutdown_one); EXPORT_SYMBOL_GPL(ata_pci_remove_one); #ifdef CONFIG_PM EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend); diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 85aa76116a305eb50d77f2544c87f09914998bed..7924d0635718dc0428f1679339e7f86aa3b18ee5 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -764,6 +764,7 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap, if (dev->flags & ATA_DFLAG_DETACH) { detach = 1; + rc = -ENODEV; goto fail; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index eb0c4ee205258eeed13cda74931764f230b8a81f..2f81d65342709c8b75b5cbf37ced79d8f1bf6572 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4571,22 +4571,19 @@ int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht) */ shost->max_host_blocked = 1; - rc = scsi_add_host_with_dma(ap->scsi_host, - &ap->tdev, ap->host->dev); + rc = scsi_add_host_with_dma(shost, &ap->tdev, ap->host->dev); if (rc) - goto err_add; + goto err_alloc; } return 0; - err_add: - scsi_host_put(host->ports[i]->scsi_host); err_alloc: while (--i >= 0) { struct Scsi_Host *shost = host->ports[i]->scsi_host; + /* scsi_host_put() is in ata_devres_release() */ scsi_remove_host(shost); - scsi_host_put(shost); } return rc; } diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index f8b7e86907cc2b088ff6453b26fedaf70d206daa..0a1ad1a1d34fbc85058e607867febab32e8d906e 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -1496,12 +1496,14 @@ fore200e_open(struct atm_vcc *vcc) static void fore200e_close(struct atm_vcc* vcc) { - struct fore200e* fore200e = FORE200E_DEV(vcc->dev); struct fore200e_vcc* fore200e_vcc; + struct fore200e* fore200e; struct fore200e_vc_map* vc_map; unsigned long flags; ASSERT(vcc); + fore200e = FORE200E_DEV(vcc->dev); + ASSERT((vcc->vpi >= 0) && (vcc->vpi < 1<vci >= 0) && (vcc->vci < 1<dev); - struct fore200e_vcc* fore200e_vcc = FORE200E_VCC(vcc); + struct fore200e* fore200e; + struct fore200e_vcc* fore200e_vcc; struct fore200e_vc_map* vc_map; - struct host_txq* txq = &fore200e->host_txq; + struct host_txq* txq; struct host_txq_entry* entry; struct tpd* tpd; struct tpd_haddr tpd_haddr; @@ -1562,9 +1564,18 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) unsigned char* data; unsigned long flags; - ASSERT(vcc); - ASSERT(fore200e); - ASSERT(fore200e_vcc); + if (!vcc) + return -EINVAL; + + fore200e = FORE200E_DEV(vcc->dev); + fore200e_vcc = FORE200E_VCC(vcc); + + if (!fore200e) + return -EINVAL; + + txq = &fore200e->host_txq; + if (!fore200e_vcc) + return -EINVAL; if (!test_bit(ATM_VF_READY, &vcc->flags)) { DPRINTK(1, "VC %d.%d.%d not ready for tx\n", vcc->itf, vcc->vpi, vcc->vpi); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 16f8f2626c9077dc468038ca42b7006763e7ae17..e80fd3da4e87940003b412e6594a64362fb6ace4 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -401,7 +401,10 @@ static int really_probe(struct device *dev, struct device_driver *drv) atomic_inc(&probe_count); pr_debug("bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); - WARN_ON(!list_empty(&dev->devres_head)); + if (!list_empty(&dev->devres_head)) { + dev_crit(dev, "Resources present before probing\n"); + return -EBUSY; + } re_probe: dev->driver = drv; diff --git a/drivers/base/platform.c b/drivers/base/platform.c index f1105de0d9fed2d68c5d12ce0da8b5545c509418..bcb6519fe2113fd891e82e2b22feb3f0fa329d54 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "base.h" #include "power/power.h" @@ -68,7 +69,7 @@ void __weak arch_setup_pdev_archdata(struct platform_device *pdev) struct resource *platform_get_resource(struct platform_device *dev, unsigned int type, unsigned int num) { - int i; + u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; @@ -163,7 +164,7 @@ struct resource *platform_get_resource_byname(struct platform_device *dev, unsigned int type, const char *name) { - int i; + u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; @@ -360,7 +361,8 @@ EXPORT_SYMBOL_GPL(platform_device_add_properties); */ int platform_device_add(struct platform_device *pdev) { - int i, ret; + u32 i; + int ret; if (!pdev) return -EINVAL; @@ -426,7 +428,7 @@ int platform_device_add(struct platform_device *pdev) pdev->id = PLATFORM_DEVID_AUTO; } - while (--i >= 0) { + while (i--) { struct resource *r = &pdev->resource[i]; if (r->parent) release_resource(r); @@ -447,7 +449,7 @@ EXPORT_SYMBOL_GPL(platform_device_add); */ void platform_device_del(struct platform_device *pdev) { - int i; + u32 i; if (pdev) { device_remove_properties(&pdev->dev); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index df97fb7cbfb59bdf11b57449e772feb160ac2ddc..f3b6afaf09c7237032f319463d13f58c8bf357d0 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1173,10 +1173,13 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a } error = dpm_run_callback(callback, dev, state, info); - if (!error) + if (!error) { dev->power.is_noirq_suspended = true; - else + } else { async_error = error; + log_suspend_abort_reason("Callback failed on %s in %pS returned %d", + dev_name(dev), callback, error); + } Complete: complete_all(&dev->power.completion); @@ -1334,10 +1337,13 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as } error = dpm_run_callback(callback, dev, state, info); - if (!error) + if (!error) { dev->power.is_late_suspended = true; - else + } else { async_error = error; + log_suspend_abort_reason("Callback failed on %s in %pS returned %d", + dev_name(dev), callback, error); + } Complete: TRACE_SUSPEND(error); @@ -1495,7 +1501,6 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_callback_t callback = NULL; const char *info = NULL; int error = 0; - char suspend_abort[MAX_SUSPEND_ABORT_LEN]; DECLARE_DPM_WATCHDOG_ON_STACK(wd); TRACE_DEVICE(dev); @@ -1518,9 +1523,6 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_wakeup_event(dev, 0); if (pm_wakeup_pending()) { - pm_get_active_wakeup_sources(suspend_abort, - MAX_SUSPEND_ABORT_LEN); - log_suspend_abort_reason(suspend_abort); dev->power.direct_complete = false; async_error = -EBUSY; goto Complete; @@ -1599,7 +1601,6 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) dev->power.is_suspended = true; if (parent) { spin_lock_irq(&parent->power.lock); - dev->parent->power.direct_complete = false; if (dev->power.wakeup_path && !dev->parent->power.ignore_children) @@ -1608,6 +1609,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) spin_unlock_irq(&parent->power.lock); } dpm_clear_suppliers_direct_complete(dev); + } else { + log_suspend_abort_reason("Callback failed on %s in %pS returned %d", + dev_name(dev), callback, error); } device_unlock(dev); @@ -1817,6 +1821,8 @@ int dpm_prepare(pm_message_t state) printk(KERN_INFO "PM: Device %s not prepared " "for power transition: code %d\n", dev_name(dev), error); + log_suspend_abort_reason("Device %s not prepared for power transition: code %d", + dev_name(dev), error); dpm_save_failed_dev(dev_name(dev)); put_device(dev); break; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 3ccbf5e0eefc2b42afb85d6823b4277d137b2e29..db2a02b8b54ab52b54fa3bc67f672fe3dcd7ec48 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -15,7 +15,9 @@ #include #include #include -#include +#include +#include +#include #include #include #include @@ -929,6 +931,7 @@ bool pm_wakeup_pending(void) { unsigned long flags; bool ret = false; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; spin_lock_irqsave(&events_lock, flags); if (events_check_enabled) { @@ -941,8 +944,10 @@ bool pm_wakeup_pending(void) spin_unlock_irqrestore(&events_lock, flags); if (ret) { - pr_info("PM: Wakeup pending, aborting suspend\n"); - pm_print_active_wakeup_sources(); + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); + pr_info("PM: %s\n", suspend_abort); } return ret || atomic_read(&pm_abort_suspend) > 0; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 16965964873e9a8a26d43d907d282cc9cba09d1d..a34e36d3bc2737c73ef07df92238e2d959164d64 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -531,6 +531,25 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data) return kobj; } +static inline void brd_check_and_reset_par(void) +{ + if (unlikely(!max_part)) + max_part = 1; + + /* + * make sure 'max_part' can be divided exactly by (1U << MINORBITS), + * otherwise, it is possiable to get same dev_t when adding partitions. + */ + if ((1U << MINORBITS) % max_part != 0) + max_part = 1UL << fls(max_part); + + if (max_part > DISK_MAX_PARTS) { + pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", + DISK_MAX_PARTS, DISK_MAX_PARTS); + max_part = DISK_MAX_PARTS; + } +} + static int __init brd_init(void) { struct brd_device *brd, *next; @@ -554,8 +573,7 @@ static int __init brd_init(void) if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) return -EIO; - if (unlikely(!max_part)) - max_part = 1; + brd_check_and_reset_par(); for (i = 0; i < rd_nr; i++) { brd = brd_alloc(i); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 5f1aa31972441a703b7411d704eb4ec6e616be96..cbf74731cfce633e369ffba343ea087df8d2a9f5 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -848,14 +848,17 @@ static void reset_fdc_info(int mode) /* selects the fdc and drive, and enables the fdc's input/dma. */ static void set_fdc(int drive) { + unsigned int new_fdc = fdc; + if (drive >= 0 && drive < N_DRIVE) { - fdc = FDC(drive); + new_fdc = FDC(drive); current_drive = drive; } - if (fdc != 1 && fdc != 0) { + if (new_fdc >= N_FDC) { pr_info("bad fdc value\n"); return; } + fdc = new_fdc; set_dor(fdc, ~0, 8); #if N_FDC > 1 set_dor(1 - fdc, ~8, 0); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 453e3728e65736e45604e8c68290aa387d9a2bff..39b119af65f72c57e90e30641dc9e8cb6a5c0da3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -211,7 +211,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) * LO_FLAGS_READ_ONLY, both are set from kernel, and losetup * will get updated by ioctl(LOOP_GET_STATUS) */ - blk_mq_freeze_queue(lo->lo_queue); + if (lo->lo_state == Lo_bound) + blk_mq_freeze_queue(lo->lo_queue); lo->use_dio = use_dio; if (use_dio) { queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); @@ -220,7 +221,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; } - blk_mq_unfreeze_queue(lo->lo_queue); + if (lo->lo_state == Lo_bound) + blk_mq_unfreeze_queue(lo->lo_queue); } static int @@ -1378,16 +1380,16 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL; - if (lo->lo_queue->limits.logical_block_size != arg) { - sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); - } + if (lo->lo_queue->limits.logical_block_size == arg) + return 0; + + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); blk_mq_freeze_queue(lo->lo_queue); /* kill_bdev should have truncated all the pages */ - if (lo->lo_queue->limits.logical_block_size != arg && - lo->lo_device->bd_inode->i_mapping->nrpages) { + if (lo->lo_device->bd_inode->i_mapping->nrpages) { err = -EAGAIN; pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", __func__, lo->lo_number, lo->lo_file_name, @@ -1615,6 +1617,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, arg = (unsigned long) compat_ptr(arg); case LOOP_SET_FD: case LOOP_CHANGE_FD: + case LOOP_SET_BLOCK_SIZE: case LOOP_SET_DIRECT_IO: err = lo_ioctl(bdev, mode, cmd, arg); break; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4c661ad91e7d32796592e8a159869a753c374971..8f56e6b2f114f17fb49d5824f22987097f7e0524 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1203,6 +1203,16 @@ static int nbd_start_device(struct nbd_device *nbd) args = kzalloc(sizeof(*args), GFP_KERNEL); if (!args) { sock_shutdown(nbd); + /* + * If num_connections is m (2 < m), + * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful. + * But NO.(n + 1) failed. We still have n recv threads. + * So, add flush_workqueue here to prevent recv threads + * dropping the last config_refs and trying to destroy + * the workqueue from inside the workqueue. + */ + if (i) + flush_workqueue(nbd->recv_workq); return -ENOMEM; } sk_set_memalloc(config->socks[i]->sock->sk); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index f01d4a8a783ace1d03157d64c419e289d62f8920..b12e373aa956ade22fbe12434469dcae998cc3ee 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -622,6 +622,7 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) if (tag != -1U) { cmd = &nq->cmds[tag]; cmd->tag = tag; + cmd->error = BLK_STS_OK; cmd->nq = nq; if (nq->dev->irqmode == NULL_IRQ_TIMER) { hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, @@ -1399,6 +1400,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, cmd->timer.function = null_cmd_timer_expired; } cmd->rq = bd->rq; + cmd->error = BLK_STS_OK; cmd->nq = nq; blk_mq_start_request(bd->rq); @@ -1593,7 +1595,12 @@ static void null_nvm_unregister(struct nullb *nullb) {} static void null_del_dev(struct nullb *nullb) { - struct nullb_device *dev = nullb->dev; + struct nullb_device *dev; + + if (!nullb) + return; + + dev = nullb->dev; ida_simple_remove(&nullb_indexes, nullb->index); @@ -1919,6 +1926,7 @@ static int null_add_dev(struct nullb_device *dev) cleanup_queues(nullb); out_free_nullb: kfree(nullb); + dev->nullb = NULL; out: return rv; } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f2b1994d58a067e9b81bbd30af9327fcf7f601fe..557cf52f674b5e3ffbe723bf7acf8a6b8ca7a08a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3847,6 +3847,10 @@ static void cancel_tasks_sync(struct rbd_device *rbd_dev) cancel_work_sync(&rbd_dev->unlock_work); } +/* + * header_rwsem must not be held to avoid a deadlock with + * rbd_dev_refresh() when flushing notifies. + */ static void rbd_unregister_watch(struct rbd_device *rbd_dev) { WARN_ON(waitqueue_active(&rbd_dev->lock_waitq)); @@ -6044,9 +6048,10 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev) static void rbd_dev_image_release(struct rbd_device *rbd_dev) { - rbd_dev_unprobe(rbd_dev); if (rbd_dev->opts) rbd_unregister_watch(rbd_dev); + + rbd_dev_unprobe(rbd_dev); rbd_dev->image_format = 0; kfree(rbd_dev->spec->image_id); rbd_dev->spec->image_id = NULL; @@ -6057,6 +6062,9 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev) * device. If this image is the one being mapped (i.e., not a * parent), initiate a watch on its header object before using that * object to get detailed information about the rbd image. + * + * On success, returns with header_rwsem held for write if called + * with @depth == 0. */ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) { @@ -6087,9 +6095,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) } } + if (!depth) + down_write(&rbd_dev->header_rwsem); + ret = rbd_dev_header_info(rbd_dev); if (ret) - goto err_out_watch; + goto err_out_probe; /* * If this image is the one being mapped, we have pool name and @@ -6133,10 +6144,11 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) return 0; err_out_probe: - rbd_dev_unprobe(rbd_dev); -err_out_watch: + if (!depth) + up_write(&rbd_dev->header_rwsem); if (!depth) rbd_unregister_watch(rbd_dev); + rbd_dev_unprobe(rbd_dev); err_out_format: rbd_dev->image_format = 0; kfree(rbd_dev->spec->image_id); @@ -6194,12 +6206,9 @@ static ssize_t do_rbd_add(struct bus_type *bus, goto err_out_rbd_dev; } - down_write(&rbd_dev->header_rwsem); rc = rbd_dev_image_probe(rbd_dev, 0); - if (rc < 0) { - up_write(&rbd_dev->header_rwsem); + if (rc < 0) goto err_out_rbd_dev; - } /* If we are mapping a snapshot it must be marked read-only */ diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index db7b36fd8d8544eef9ffc735425b31aaf51630f3..2531bfb57fdc14a46c25b62a979168d8995a5d2f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -364,10 +364,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); - blk_mq_stop_hw_queue(hctx); + /* Don't stop the queue if -ENOMEM: we may have failed to + * bounce the buffer due to global resource outage. + */ + if (err == -ENOSPC) + blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); - /* Out of mem doesn't actually happen, since we fall back - * to direct descriptors */ if (err == -ENOMEM || err == -ENOSPC) return BLK_STS_RESOURCE; return BLK_STS_IOERR; diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 1b76d958590275daa6b9c2f8c69ecd2d51655da7..2ca2cc56bcef6f849c98d465a59e43ad62214a10 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -345,7 +345,7 @@ static int sunxi_rsb_read(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr, if (ret) goto unlock; - *buf = readl(rsb->regs + RSB_DATA); + *buf = readl(rsb->regs + RSB_DATA) & GENMASK(len * 8 - 1, 0); unlock: mutex_unlock(&rsb->lock); diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 88db42d30760645d84cd8e5f4ecd70f1239457b9..48194d1a607673c7605cbcf4e1d6f26f707a231e 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -110,8 +110,10 @@ static int imx_rngc_self_test(struct imx_rngc *rngc) return -ETIMEDOUT; } - if (rngc->err_reg != 0) + if (rngc->err_reg != 0) { + imx_rngc_irq_mask_clear(rngc); return -EIO; + } return 0; } diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index c82d9fd2f05af6c787ab640a88dd4a7b47717381..f72a272eeb9b28ce13ef9f20f03a23baed15ecbe 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2647,7 +2647,9 @@ get_guid(ipmi_smi_t intf) if (rv) /* Send failed, no GUID available. */ intf->bmc->guid_set = 0; - wait_event(intf->waitq, intf->bmc->guid_set != 2); + else + wait_event(intf->waitq, intf->bmc->guid_set != 2); + intf->null_user_handler = NULL; } diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 941bffd9b49cd53791831fffe39ecc62b6fc62cc..0146bc3252c5abcd3853190b38609c8d37b5f631 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -750,10 +750,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, flags = ipmi_ssif_lock_cond(ssif_info, &oflags); msg = ssif_info->curr_msg; if (msg) { + if (data) { + if (len > IPMI_MAX_MSG_LENGTH) + len = IPMI_MAX_MSG_LENGTH; + memcpy(msg->rsp, data, len); + } else { + len = 0; + } msg->rsp_size = len; - if (msg->rsp_size > IPMI_MAX_MSG_LENGTH) - msg->rsp_size = IPMI_MAX_MSG_LENGTH; - memcpy(msg->rsp, data, msg->rsp_size); ssif_info->curr_msg = NULL; } diff --git a/drivers/char/random.c b/drivers/char/random.c index 07d7e0ec393b270707348e7965e37f653463c697..7ac94875300735d30378fee47c26299caa42f43b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2192,11 +2192,11 @@ struct batched_entropy { /* * Get a random word for internal kernel use only. The quality of the random - * number is either as good as RDRAND or as good as /dev/urandom, with the - * goal of being quite fast and not depleting entropy. In order to ensure + * number is good as /dev/urandom, but there is no backtrack protection, with + * the goal of being quite fast and not depleting entropy. In order to ensure * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once - * at any point prior. + * wait_for_random_bytes() should be called and return 0 at least once at any + * point prior. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), @@ -2209,15 +2209,6 @@ u64 get_random_u64(void) struct batched_entropy *batch; static void *previous; -#if BITS_PER_LONG == 64 - if (arch_get_random_long((unsigned long *)&ret)) - return ret; -#else - if (arch_get_random_long((unsigned long *)&ret) && - arch_get_random_long((unsigned long *)&ret + 1)) - return ret; -#endif - warn_unseeded_randomness(&previous); batch = raw_cpu_ptr(&batched_entropy_u64); @@ -2242,9 +2233,6 @@ u32 get_random_u32(void) struct batched_entropy *batch; static void *previous; - if (arch_get_random_int(&ret)) - return ret; - warn_unseeded_randomness(&previous); batch = raw_cpu_ptr(&batched_entropy_u32); diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 77e47dc5aacc5705b465583d2c02f26f8b393d56..569e93e1f06ccdc978d30ee6035aa850dc2f5e72 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012 IBM Corporation + * Copyright (C) 2012-2020 IBM Corporation * * Author: Ashley Lai * @@ -140,6 +140,64 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) return len; } +/** + * ibmvtpm_crq_send_init - Send a CRQ initialize message + * @ibmvtpm: vtpm device struct + * + * Return: + * 0 on success. + * Non-zero on failure. + */ +static int ibmvtpm_crq_send_init(struct ibmvtpm_dev *ibmvtpm) +{ + int rc; + + rc = ibmvtpm_send_crq_word(ibmvtpm->vdev, INIT_CRQ_CMD); + if (rc != H_SUCCESS) + dev_err(ibmvtpm->dev, + "%s failed rc=%d\n", __func__, rc); + + return rc; +} + +/** + * tpm_ibmvtpm_resume - Resume from suspend + * + * @dev: device struct + * + * Return: Always 0. + */ +static int tpm_ibmvtpm_resume(struct device *dev) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev); + int rc = 0; + + do { + if (rc) + msleep(100); + rc = plpar_hcall_norets(H_ENABLE_CRQ, + ibmvtpm->vdev->unit_address); + } while (rc == H_IN_PROGRESS || rc == H_BUSY || H_IS_LONG_BUSY(rc)); + + if (rc) { + dev_err(dev, "Error enabling ibmvtpm rc=%d\n", rc); + return rc; + } + + rc = vio_enable_interrupts(ibmvtpm->vdev); + if (rc) { + dev_err(dev, "Error vio_enable_interrupts rc=%d\n", rc); + return rc; + } + + rc = ibmvtpm_crq_send_init(ibmvtpm); + if (rc) + dev_err(dev, "Error send_init rc=%d\n", rc); + + return rc; +} + /** * tpm_ibmvtpm_send() - Send a TPM command * @chip: tpm chip struct @@ -153,6 +211,7 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) { struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev); + bool retry = true; int rc, sig; if (!ibmvtpm->rtce_buf) { @@ -186,18 +245,27 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) */ ibmvtpm->tpm_processing_cmd = true; +again: rc = ibmvtpm_send_crq(ibmvtpm->vdev, IBMVTPM_VALID_CMD, VTPM_TPM_COMMAND, count, ibmvtpm->rtce_dma_handle); if (rc != H_SUCCESS) { + /* + * H_CLOSED can be returned after LPM resume. Call + * tpm_ibmvtpm_resume() to re-enable the CRQ then retry + * ibmvtpm_send_crq() once before failing. + */ + if (rc == H_CLOSED && retry) { + tpm_ibmvtpm_resume(ibmvtpm->dev); + retry = false; + goto again; + } dev_err(ibmvtpm->dev, "tpm_ibmvtpm_send failed rc=%d\n", rc); - rc = 0; ibmvtpm->tpm_processing_cmd = false; - } else - rc = 0; + } spin_unlock(&ibmvtpm->rtce_lock); - return rc; + return 0; } static void tpm_ibmvtpm_cancel(struct tpm_chip *chip) @@ -275,26 +343,6 @@ static int ibmvtpm_crq_send_init_complete(struct ibmvtpm_dev *ibmvtpm) return rc; } -/** - * ibmvtpm_crq_send_init - Send a CRQ initialize message - * @ibmvtpm: vtpm device struct - * - * Return: - * 0 on success. - * Non-zero on failure. - */ -static int ibmvtpm_crq_send_init(struct ibmvtpm_dev *ibmvtpm) -{ - int rc; - - rc = ibmvtpm_send_crq_word(ibmvtpm->vdev, INIT_CRQ_CMD); - if (rc != H_SUCCESS) - dev_err(ibmvtpm->dev, - "ibmvtpm_crq_send_init failed rc=%d\n", rc); - - return rc; -} - /** * tpm_ibmvtpm_remove - ibm vtpm remove entry point * @vdev: vio device struct @@ -407,44 +455,6 @@ static int ibmvtpm_reset_crq(struct ibmvtpm_dev *ibmvtpm) ibmvtpm->crq_dma_handle, CRQ_RES_BUF_SIZE); } -/** - * tpm_ibmvtpm_resume - Resume from suspend - * - * @dev: device struct - * - * Return: Always 0. - */ -static int tpm_ibmvtpm_resume(struct device *dev) -{ - struct tpm_chip *chip = dev_get_drvdata(dev); - struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev); - int rc = 0; - - do { - if (rc) - msleep(100); - rc = plpar_hcall_norets(H_ENABLE_CRQ, - ibmvtpm->vdev->unit_address); - } while (rc == H_IN_PROGRESS || rc == H_BUSY || H_IS_LONG_BUSY(rc)); - - if (rc) { - dev_err(dev, "Error enabling ibmvtpm rc=%d\n", rc); - return rc; - } - - rc = vio_enable_interrupts(ibmvtpm->vdev); - if (rc) { - dev_err(dev, "Error vio_enable_interrupts rc=%d\n", rc); - return rc; - } - - rc = ibmvtpm_crq_send_init(ibmvtpm); - if (rc) - dev_err(dev, "Error send_init rc=%d\n", rc); - - return rc; -} - static bool tpm_ibmvtpm_req_canceled(struct tpm_chip *chip, u8 status) { return (status == 0); diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index a7d9c0c53fcd0976e2870347ba69b4f82b602eb9..9b1116501f209fe19220d81d0a04080e47a51266 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -331,6 +331,9 @@ static void disable_interrupts(struct tpm_chip *chip) u32 intmask; int rc; + if (priv->irq == 0) + return; + rc = tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask); if (rc < 0) intmask = 0; @@ -874,9 +877,12 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq, if (irq) { tpm_tis_probe_irq_single(chip, intmask, IRQF_SHARED, irq); - if (!(chip->flags & TPM_CHIP_FLAG_IRQ)) + if (!(chip->flags & TPM_CHIP_FLAG_IRQ)) { dev_err(&chip->dev, FW_BUG "TPM interrupt not working, polling instead\n"); + + disable_interrupts(chip); + } } else { tpm_tis_probe_irq(chip, intmask); } diff --git a/drivers/clk/at91/clk-usb.c b/drivers/clk/at91/clk-usb.c index 791770a563fcce83d5538a52b5cc86bcc7a925c7..6fac6383d024ea7ff88b58f1cbf22225af6c3692 100644 --- a/drivers/clk/at91/clk-usb.c +++ b/drivers/clk/at91/clk-usb.c @@ -78,6 +78,9 @@ static int at91sam9x5_clk_usb_determine_rate(struct clk_hw *hw, tmp_parent_rate = req->rate * div; tmp_parent_rate = clk_hw_round_rate(parent, tmp_parent_rate); + if (!tmp_parent_rate) + continue; + tmp_rate = DIV_ROUND_CLOSEST(tmp_parent_rate, div); if (tmp_rate < req->rate) tmp_diff = req->rate - tmp_rate; diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c index eaafc038368f57c8368f86ade2a488def08fe9f9..183985c8c9babfb12afa93587ddbf45534665ffc 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c @@ -884,11 +884,26 @@ static const struct sunxi_ccu_desc sun50i_a64_ccu_desc = { .num_resets = ARRAY_SIZE(sun50i_a64_ccu_resets), }; +static struct ccu_pll_nb sun50i_a64_pll_cpu_nb = { + .common = &pll_cpux_clk.common, + /* copy from pll_cpux_clk */ + .enable = BIT(31), + .lock = BIT(28), +}; + +static struct ccu_mux_nb sun50i_a64_cpu_nb = { + .common = &cpux_clk.common, + .cm = &cpux_clk.mux, + .delay_us = 1, /* > 8 clock cycles at 24 MHz */ + .bypass_index = 1, /* index of 24 MHz oscillator */ +}; + static int sun50i_a64_ccu_probe(struct platform_device *pdev) { struct resource *res; void __iomem *reg; u32 val; + int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); reg = devm_ioremap_resource(&pdev->dev, res); @@ -902,7 +917,18 @@ static int sun50i_a64_ccu_probe(struct platform_device *pdev) writel(0x515, reg + SUN50I_A64_PLL_MIPI_REG); - return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc); + ret = sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc); + if (ret) + return ret; + + /* Gate then ungate PLL CPU after any rate changes */ + ccu_pll_notifier_register(&sun50i_a64_pll_cpu_nb); + + /* Reparent CPU during PLL CPU rate changes */ + ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk, + &sun50i_a64_cpu_nb); + + return 0; } static const struct of_device_id sun50i_a64_ccu_ids[] = { diff --git a/drivers/clk/tegra/clk-tegra-pmc.c b/drivers/clk/tegra/clk-tegra-pmc.c index a35579a3f884fbd3fa920e6d30950144c385c3f2..476dab494c44d3d5ea802ea863281fe585ae6edc 100644 --- a/drivers/clk/tegra/clk-tegra-pmc.c +++ b/drivers/clk/tegra/clk-tegra-pmc.c @@ -60,16 +60,16 @@ struct pmc_clk_init_data { static DEFINE_SPINLOCK(clk_out_lock); -static const char *clk_out1_parents[] = { "clk_m", "clk_m_div2", - "clk_m_div4", "extern1", +static const char *clk_out1_parents[] = { "osc", "osc_div2", + "osc_div4", "extern1", }; -static const char *clk_out2_parents[] = { "clk_m", "clk_m_div2", - "clk_m_div4", "extern2", +static const char *clk_out2_parents[] = { "osc", "osc_div2", + "osc_div4", "extern2", }; -static const char *clk_out3_parents[] = { "clk_m", "clk_m_div2", - "clk_m_div4", "extern3", +static const char *clk_out3_parents[] = { "osc", "osc_div2", + "osc_div4", "extern3", }; static struct pmc_clk_init_data pmc_clks[] = { diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c index 39e489a96ad74f0f8e341aa2b3f9c8e6acfc4bfe..8894cfc32be06b62f0c0d1eaff4f2632d6ffd89f 100644 --- a/drivers/clocksource/bcm2835_timer.c +++ b/drivers/clocksource/bcm2835_timer.c @@ -134,7 +134,7 @@ static int __init bcm2835_timer_init(struct device_node *node) ret = setup_irq(irq, &timer->act); if (ret) { pr_err("Can't set up timer IRQ\n"); - goto err_iounmap; + goto err_timer_free; } clockevents_config_and_register(&timer->evt, freq, 0xf, 0xffffffff); @@ -143,6 +143,9 @@ static int __init bcm2835_timer_init(struct device_node *node) return 0; +err_timer_free: + kfree(timer); + err_iounmap: iounmap(base); return ret; diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 33854bf127f92bce16e0573ec3b5c1dba8709c20..25c9a6cdd8614503eb8a47fde088b86c96f6402f 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -1041,6 +1041,12 @@ static int init_chip_info(void) static inline void clean_chip_info(void) { + int i; + + /* flush any pending work items */ + if (chips) + for (i = 0; i < nr_chips; i++) + cancel_work_sync(&chips[i].throttle); kfree(chips); } diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c index b23c7b72525c0f64d1a23dcb3717e874b4b627f3..a3d507fb9ea5ab4324b757bcb1015cb88f88d668 100644 --- a/drivers/crypto/caam/caamalg_desc.c +++ b/drivers/crypto/caam/caamalg_desc.c @@ -1280,7 +1280,13 @@ EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_givencap); */ void cnstr_shdsc_xts_ablkcipher_encap(u32 * const desc, struct alginfo *cdata) { - __be64 sector_size = cpu_to_be64(512); + /* + * Set sector size to a big value, practically disabling + * sector size segmentation in xts implementation. We cannot + * take full advantage of this HW feature with existing + * crypto API / dm-crypt SW architecture. + */ + __be64 sector_size = cpu_to_be64(BIT(15)); u32 *key_jump_cmd; init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); @@ -1332,7 +1338,13 @@ EXPORT_SYMBOL(cnstr_shdsc_xts_ablkcipher_encap); */ void cnstr_shdsc_xts_ablkcipher_decap(u32 * const desc, struct alginfo *cdata) { - __be64 sector_size = cpu_to_be64(512); + /* + * Set sector size to a big value, practically disabling + * sector size segmentation in xts implementation. We cannot + * take full advantage of this HW feature with existing + * crypto API / dm-crypt SW architecture. + */ + __be64 sector_size = cpu_to_be64(BIT(15)); u32 *key_jump_cmd; init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index e1e1e81107904f5304cff08956c1a0935a2edb4c..eb569cf0630955f4dfa378fc3927b2a1d788c44e 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -25,6 +25,7 @@ #include #include #include +#include #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE @@ -36,11 +37,11 @@ * Null hashes to align with hw behavior on imx6sl and ull * these are flipped for consistency with hw output */ -const uint8_t sha1_null_hash[] = +static const uint8_t sha1_null_hash[] = "\x09\x07\xd8\xaf\x90\x18\x60\x95\xef\xbf" "\x55\x32\x0d\x4b\x6b\x5e\xee\xa3\x39\xda"; -const uint8_t sha256_null_hash[] = +static const uint8_t sha256_null_hash[] = "\x55\xb8\x52\x78\x1b\x99\x95\xa4" "\x4c\x93\x9b\x64\xe4\x41\xae\x27" "\x24\xb9\x6f\x99\xc8\xf4\xfb\x9a" @@ -621,49 +622,46 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq) struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req); struct hash_alg_common *halg = crypto_hash_alg_common(tfm); - const int nents = sg_nents(req->src); uint8_t *in_buf = sdcp->coh->sha_in_buf; uint8_t *out_buf = sdcp->coh->sha_out_buf; - uint8_t *src_buf; - struct scatterlist *src; - unsigned int i, len, clen; + unsigned int i, len, clen, oft = 0; int ret; int fin = rctx->fini; if (fin) rctx->fini = 0; - for_each_sg(req->src, src, nents, i) { - src_buf = sg_virt(src); - len = sg_dma_len(src); - - do { - if (actx->fill + len > DCP_BUF_SZ) - clen = DCP_BUF_SZ - actx->fill; - else - clen = len; - - memcpy(in_buf + actx->fill, src_buf, clen); - len -= clen; - src_buf += clen; - actx->fill += clen; + src = req->src; + len = req->nbytes; - /* - * If we filled the buffer and still have some - * more data, submit the buffer. - */ - if (len && actx->fill == DCP_BUF_SZ) { - ret = mxs_dcp_run_sha(req); - if (ret) - return ret; - actx->fill = 0; - rctx->init = 0; - } - } while (len); + while (len) { + if (actx->fill + len > DCP_BUF_SZ) + clen = DCP_BUF_SZ - actx->fill; + else + clen = len; + + scatterwalk_map_and_copy(in_buf + actx->fill, src, oft, clen, + 0); + + len -= clen; + oft += clen; + actx->fill += clen; + + /* + * If we filled the buffer and still have some + * more data, submit the buffer. + */ + if (len && actx->fill == DCP_BUF_SZ) { + ret = mxs_dcp_run_sha(req); + if (ret) + return ret; + actx->fill = 0; + rctx->init = 0; + } } if (fin) { diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 70d8bcb2a93e8d2473d74529e0f815077cfa913c..813417c57703a59f1c6369fa49c58f9b0aebf464 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -203,7 +203,8 @@ config ARM_TEGRA_DEVFREQ config ARM_RK3399_DMC_DEVFREQ tristate "ARM RK3399 DMC DEVFREQ Driver" - depends on ARCH_ROCKCHIP + depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \ + (COMPILE_TEST && HAVE_ARM_SMCCC) select DEVFREQ_EVENT_ROCKCHIP_DFI select DEVFREQ_GOV_SIMPLE_ONDEMAND select PM_DEVFREQ_EVENT diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig index cd949800eed962cffa34b599d3ce66f386763d11..8851bc4e8e3e17935b6fc109ff926722c3d68737 100644 --- a/drivers/devfreq/event/Kconfig +++ b/drivers/devfreq/event/Kconfig @@ -33,7 +33,7 @@ config DEVFREQ_EVENT_EXYNOS_PPMU config DEVFREQ_EVENT_ROCKCHIP_DFI tristate "ROCKCHIP DFI DEVFREQ event Driver" - depends on ARCH_ROCKCHIP + depends on ARCH_ROCKCHIP || COMPILE_TEST help This add the devfreq-event driver for Rockchip SoC. It provides DFI (DDR Monitor Module) driver to count ddr load. diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index 6d7d2d54eacf8446156ed44c4598ba6911737e07..f0932f25a9b1ff03e0d8df782f4ddbdd26bd1000 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1944,8 +1944,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) return; } - spin_lock(&cohc->lock); - /* * When we reach this point, at least one queue item * should have been moved over from cohc->queue to @@ -1966,8 +1964,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) if (coh901318_queue_start(cohc) == NULL) cohc->busy = 0; - spin_unlock(&cohc->lock); - /* * This tasklet will remove items from cohc->active * and thus terminates them. diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index b451354735d3d6b80b003f8e6c3ea098d37041e0..faaaf10311ec0b80a48af6910e035cb9051e5b8d 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -192,7 +192,7 @@ __dma_device_satisfies_mask(struct dma_device *device, static struct module *dma_chan_to_owner(struct dma_chan *chan) { - return chan->device->dev->driver->owner; + return chan->device->owner; } /** @@ -928,6 +928,8 @@ int dma_async_device_register(struct dma_device *device) return -EIO; } + device->owner = device->dev->driver->owner; + if (dma_has_cap(DMA_MEMCPY, device->cap_mask) && !device->device_prep_dma_memcpy) { dev_err(device->dev, "Device claims capability %s, but op is not defined\n", diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index e393361277415ef6effbcccd45b92edef68653cf..d19a602beebd15a6b7510b03649f2b661f0d2607 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -552,8 +552,8 @@ static int dmatest_func(void *data) flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; ktime = ktime_get(); - while (!kthread_should_stop() - && !(params->iterations && total_tests >= params->iterations)) { + while (!(kthread_should_stop() || + (params->iterations && total_tests >= params->iterations))) { struct dma_async_tx_descriptor *tx = NULL; struct dmaengine_unmap_data *um; dma_addr_t srcs[src_cnt]; diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 7db2766b5fe9e322994b6f5201ec966b671f6994..3402494cadf999a4bba5faf0e0dec5ba4733a404 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -288,7 +288,7 @@ static struct tegra_dma_desc *tegra_dma_desc_get( /* Do not allocate if desc are waiting for ack */ list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) { - if (async_tx_test_ack(&dma_desc->txd)) { + if (async_tx_test_ack(&dma_desc->txd) && !dma_desc->cb_count) { list_del(&dma_desc->node); spin_unlock_irqrestore(&tdc->lock, flags); dma_desc->txd.flags = 0; @@ -755,10 +755,6 @@ static int tegra_dma_terminate_all(struct dma_chan *dc) bool was_busy; spin_lock_irqsave(&tdc->lock, flags); - if (list_empty(&tdc->pending_sg_req)) { - spin_unlock_irqrestore(&tdc->lock, flags); - return 0; - } if (!tdc->busy) goto skip_dma_stop; diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 40fb0e7ff8fd9c018c4db831e8ba21d4df942b4f..b36abd2537863f15aa100ea2cad9627327b144b2 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2863,6 +2863,7 @@ static int init_csrows(struct mem_ctl_info *mci) dimm = csrow->channels[j]->dimm; dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; + dimm->grain = 64; } } diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index f50072b51aefbab846359786d01e26d534d80b96..b39b7e6d4e4dc1a6bcbde746472714b82f5ad521 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -550,7 +550,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz, } } - if (efi_enabled(EFI_MEMMAP)) + if (!IS_ENABLED(CONFIG_X86_32) && efi_enabled(EFI_MEMMAP)) efi_memattr_init(); /* Parse the EFI Properties table if it exists */ diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 3e626fd9bd4e1fafe6e0f4da4597ed66a978bd08..1c65f5ac43686cea83939ad3672ca29ba00278f1 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -139,13 +139,16 @@ static ssize_t efivar_attr_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; if (var->Attributes & EFI_VARIABLE_NON_VOLATILE) @@ -172,13 +175,16 @@ static ssize_t efivar_size_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; str += sprintf(str, "0x%lx\n", var->DataSize); @@ -189,12 +195,15 @@ static ssize_t efivar_data_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; memcpy(buf, var->Data, var->DataSize); @@ -263,6 +272,9 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) u8 *data; int err; + if (!entry || !buf) + return -EINVAL; + if (is_compat()) { struct compat_efi_variable *compat; @@ -314,14 +326,16 @@ efivar_show_raw(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; struct compat_efi_variable *compat; + unsigned long datasize = sizeof(var->Data); size_t size; + int ret; if (!entry || !buf) return 0; - var->DataSize = 1024; - if (efivar_entry_get(entry, &entry->var.Attributes, - &entry->var.DataSize, entry->var.Data)) + ret = efivar_entry_get(entry, &var->Attributes, &datasize, var->Data); + var->DataSize = datasize; + if (ret) return -EIO; if (is_compat()) { diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c index 6544a16ab02e9b94f836d03e84757bc85a3ca711..7541bd327e6c5b332f12c84f5f74e737a76cd9ee 100644 --- a/drivers/gpio/gpio-grgpio.c +++ b/drivers/gpio/gpio-grgpio.c @@ -259,17 +259,16 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq, lirq->irq = irq; uirq = &priv->uirqs[lirq->index]; if (uirq->refcnt == 0) { + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ret = request_irq(uirq->uirq, grgpio_irq_handler, 0, dev_name(priv->dev), priv); if (ret) { dev_err(priv->dev, "Could not request underlying irq %d\n", uirq->uirq); - - spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); - return ret; } + spin_lock_irqsave(&priv->gc.bgpio_lock, flags); } uirq->refcnt++; @@ -315,8 +314,11 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq) if (index >= 0) { uirq = &priv->uirqs[lirq->index]; uirq->refcnt--; - if (uirq->refcnt == 0) + if (uirq->refcnt == 0) { + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); free_irq(uirq->uirq, priv); + return; + } } spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index c7b9125c8ec229e7b9871c63665751177fd54bdc..7c06f4541c5d057ff9873a066e923a95b7591cb9 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -24,18 +24,21 @@ #include "gpiolib.h" -#define QUIRK_NO_EDGE_EVENTS_ON_BOOT 0x01l -#define QUIRK_NO_WAKEUP 0x02l - static int run_edge_events_on_boot = -1; module_param(run_edge_events_on_boot, int, 0444); MODULE_PARM_DESC(run_edge_events_on_boot, "Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto"); -static int honor_wakeup = -1; -module_param(honor_wakeup, int, 0444); -MODULE_PARM_DESC(honor_wakeup, - "Honor the ACPI wake-capable flag: 0=no, 1=yes, -1=auto"); +static char *ignore_wake; +module_param(ignore_wake, charp, 0444); +MODULE_PARM_DESC(ignore_wake, + "controller@pin combos on which to ignore the ACPI wake flag " + "ignore_wake=controller@pin[,controller@pin[,...]]"); + +struct acpi_gpiolib_dmi_quirk { + bool no_edge_events_on_boot; + char *ignore_wake; +}; /** * struct acpi_gpio_event - ACPI GPIO event handler data @@ -262,6 +265,57 @@ static void acpi_gpiochip_request_irqs(struct acpi_gpio_chip *acpi_gpio) acpi_gpiochip_request_irq(acpi_gpio, event); } +static bool acpi_gpio_in_ignore_list(const char *controller_in, int pin_in) +{ + const char *controller, *pin_str; + int len, pin; + char *endp; + + controller = ignore_wake; + while (controller) { + pin_str = strchr(controller, '@'); + if (!pin_str) + goto err; + + len = pin_str - controller; + if (len == strlen(controller_in) && + strncmp(controller, controller_in, len) == 0) { + pin = simple_strtoul(pin_str + 1, &endp, 10); + if (*endp != 0 && *endp != ',') + goto err; + + if (pin == pin_in) + return true; + } + + controller = strchr(controller, ','); + if (controller) + controller++; + } + + return false; +err: + pr_err_once("Error invalid value for gpiolib_acpi.ignore_wake: %s\n", + ignore_wake); + return false; +} + +static bool acpi_gpio_irq_is_wake(struct device *parent, + struct acpi_resource_gpio *agpio) +{ + int pin = agpio->pin_table[0]; + + if (agpio->wake_capable != ACPI_WAKE_CAPABLE) + return false; + + if (acpi_gpio_in_ignore_list(dev_name(parent), pin)) { + dev_info(parent, "Ignoring wakeup on pin %d\n", pin); + return false; + } + + return true; +} + static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, void *context) { @@ -347,7 +401,7 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, event->handle = evt_handle; event->handler = handler; event->irq = irq; - event->irq_is_wake = honor_wakeup && agpio->wake_capable == ACPI_WAKE_CAPABLE; + event->irq_is_wake = acpi_gpio_irq_is_wake(chip->parent, agpio); event->pin = pin; event->desc = desc; @@ -1331,7 +1385,9 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "MINIX"), DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"), }, - .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .no_edge_events_on_boot = true, + }, }, { /* @@ -1344,16 +1400,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"), DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"), }, - .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .no_edge_events_on_boot = true, + }, }, { /* - * Various HP X2 10 Cherry Trail models use an external - * embedded-controller connected via I2C + an ACPI GPIO - * event handler. The embedded controller generates various - * spurious wakeup events when suspended. So disable wakeup - * for its handler (it uses the only ACPI GPIO event handler). - * This breaks wakeup when opening the lid, the user needs + * HP X2 10 models with Cherry Trail SoC + TI PMIC use an + * external embedded-controller connected via I2C + an ACPI GPIO + * event handler on INT33FF:01 pin 0, causing spurious wakeups. + * When suspending by closing the LID, the power to the USB + * keyboard is turned off, causing INT0002 ACPI events to + * trigger once the XHCI controller notices the keyboard is + * gone. So INT0002 events cause spurious wakeups too. Ignoring + * EC wakes breaks wakeup when opening the lid, the user needs * to press the power-button to wakeup the system. The * alternative is suspend simply not working, which is worse. */ @@ -1361,33 +1421,61 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"), }, - .driver_data = (void *)QUIRK_NO_WAKEUP, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "INT33FF:01@0,INT0002:00@2", + }, + }, + { + /* + * HP X2 10 models with Bay Trail SoC + AXP288 PMIC use an + * external embedded-controller connected via I2C + an ACPI GPIO + * event handler on INT33FC:02 pin 28, causing spurious wakeups. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), + DMI_MATCH(DMI_BOARD_NAME, "815D"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "INT33FC:02@28", + }, + }, + { + /* + * HP X2 10 models with Cherry Trail SoC + AXP288 PMIC use an + * external embedded-controller connected via I2C + an ACPI GPIO + * event handler on INT33FF:01 pin 0, causing spurious wakeups. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), + DMI_MATCH(DMI_BOARD_NAME, "813E"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "INT33FF:01@0", + }, }, {} /* Terminating entry */ }; static int acpi_gpio_setup_params(void) { + const struct acpi_gpiolib_dmi_quirk *quirk = NULL; const struct dmi_system_id *id; - long quirks = 0; id = dmi_first_match(gpiolib_acpi_quirks); if (id) - quirks = (long)id->driver_data; + quirk = id->driver_data; if (run_edge_events_on_boot < 0) { - if (quirks & QUIRK_NO_EDGE_EVENTS_ON_BOOT) + if (quirk && quirk->no_edge_events_on_boot) run_edge_events_on_boot = 0; else run_edge_events_on_boot = 1; } - if (honor_wakeup < 0) { - if (quirks & QUIRK_NO_WAKEUP) - honor_wakeup = 0; - else - honor_wakeup = 1; - } + if (ignore_wake == NULL && quirk && quirk->ignore_wake) + ignore_wake = quirk->ignore_wake; return 0; } diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile index f4ebf7783ffc0d7927a629c9adda06f9ec4eb685..3c9f24d7b2ef982b50d1cf3319aa15525d42a21f 100644 --- a/drivers/gpu/Makefile +++ b/drivers/gpu/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_TEGRA_HOST1X) += host1x/ obj-y += drm/ vga/ obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ obj-$(CONFIG_QCOM_KGSL) += msm/ +obj-$(CONFIG_TRACE_GPU_MEM) += trace/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index cc4e18dcd8b6f96abf3eade8a91cbe43ae4660c4..2153f19e59ccced533dd3da19c480054a70e557c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -336,17 +336,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * path_size += le16_to_cpu(path->usSize); if (device_support & le16_to_cpu(path->usDeviceTag)) { - uint8_t con_obj_id, con_obj_num, con_obj_type; - - con_obj_id = + uint8_t con_obj_id = (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - con_obj_num = - (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK) - >> ENUM_ID_SHIFT; - con_obj_type = - (le16_to_cpu(path->usConnObjectId) & - OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; /* Skip TV/CV support */ if ((le16_to_cpu(path->usDeviceTag) == @@ -371,15 +363,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * router.ddc_valid = false; router.cd_valid = false; for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) { - uint8_t grph_obj_id, grph_obj_num, grph_obj_type; - - grph_obj_id = - (le16_to_cpu(path->usGraphicObjIds[j]) & - OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - grph_obj_num = - (le16_to_cpu(path->usGraphicObjIds[j]) & - ENUM_ID_MASK) >> ENUM_ID_SHIFT; - grph_obj_type = + uint8_t grph_obj_type = (le16_to_cpu(path->usGraphicObjIds[j]) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ff7d4827385e4d96ff3fca35161d77c40bfb86ff..7a2366bd1fbaf1f03d74ca5c89201296cb82b856 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -279,7 +279,12 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev) } static u32 soc15_get_xclk(struct amdgpu_device *adev) { - return adev->clock.spll.reference_freq; + u32 reference_clock = adev->clock.spll.reference_freq; + + if (adev->asic_type == CHIP_RAVEN) + return reference_clock / 4; + + return reference_clock; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 61fff25b4ce7dadbb789b00fca8b09d40835baa4..ecd4eba221c0a0da36f188bb7fce14b8d1116460 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -550,9 +550,9 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, return 0; kfd_gtt_no_free_chunk: - pr_debug("Allocation failed with mem_obj = %p\n", mem_obj); + pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj); mutex_unlock(&kfd->gtt_sa_lock); - kfree(mem_obj); + kfree(*mem_obj); return -ENOMEM; } diff --git a/drivers/gpu/drm/bochs/bochs_hw.c b/drivers/gpu/drm/bochs/bochs_hw.c index a39b0343c197dc8a6aea00aa244b80dae1fde232..401c218567af9538f3f5416f9daa1850e7e202d6 100644 --- a/drivers/gpu/drm/bochs/bochs_hw.c +++ b/drivers/gpu/drm/bochs/bochs_hw.c @@ -97,10 +97,8 @@ int bochs_hw_init(struct drm_device *dev, uint32_t flags) size = min(size, mem); } - if (pci_request_region(pdev, 0, "bochs-drm") != 0) { - DRM_ERROR("Cannot request framebuffer\n"); - return -EBUSY; - } + if (pci_request_region(pdev, 0, "bochs-drm") != 0) + DRM_WARN("Cannot request framebuffer, boot fb still active?\n"); bochs->fb_map = ioremap(addr, size); if (bochs->fb_map == NULL) { diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index cc1094f9012553ccc6ad281a0958b2a09e22a0ca..96cf64d0ee82480dfe35f4056f23c36f63c4d7e4 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1348,28 +1348,34 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, struct drm_display_mode *mode) frame.colorspace = HDMI_COLORSPACE_RGB; /* Set up colorimetry */ - switch (hdmi->hdmi_data.enc_out_encoding) { - case V4L2_YCBCR_ENC_601: - if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601) - frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; - else + if (!hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) { + switch (hdmi->hdmi_data.enc_out_encoding) { + case V4L2_YCBCR_ENC_601: + if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601) + frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; + else + frame.colorimetry = HDMI_COLORIMETRY_ITU_601; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; + break; + case V4L2_YCBCR_ENC_709: + if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709) + frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; + else + frame.colorimetry = HDMI_COLORIMETRY_ITU_709; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_709; + break; + default: /* Carries no data */ frame.colorimetry = HDMI_COLORIMETRY_ITU_601; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; + break; + } + } else { + frame.colorimetry = HDMI_COLORIMETRY_NONE; frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; - break; - case V4L2_YCBCR_ENC_709: - if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709) - frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; - else - frame.colorimetry = HDMI_COLORIMETRY_ITU_709; - frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_709; - break; - default: /* Carries no data */ - frame.colorimetry = HDMI_COLORIMETRY_ITU_601; - frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; - break; + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; } frame.scan_mode = HDMI_SCAN_MODE_NONE; diff --git a/drivers/gpu/drm/drm_debugfs_crc.c b/drivers/gpu/drm/drm_debugfs_crc.c index 2901b7944068de4fd169122a236cc94da420e95c..6858c80d2eb503db25664159994a08645659520e 100644 --- a/drivers/gpu/drm/drm_debugfs_crc.c +++ b/drivers/gpu/drm/drm_debugfs_crc.c @@ -101,8 +101,8 @@ static ssize_t crc_control_write(struct file *file, const char __user *ubuf, if (IS_ERR(source)) return PTR_ERR(source); - if (source[len] == '\n') - source[len] = '\0'; + if (source[len - 1] == '\n') + source[len - 1] = '\0'; spin_lock_irq(&crc->lock); diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index c635e13e92b92f699da3458ba3a6e897da91c986..fe59b05bbe7ba36ebfb564759fdb4088d5241e5f 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1019,20 +1019,9 @@ static struct drm_dp_mst_port *drm_dp_mst_get_port_ref_locked(struct drm_dp_mst_ static struct drm_dp_mst_port *drm_dp_get_validated_port_ref(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port) { struct drm_dp_mst_port *rport = NULL; - mutex_lock(&mgr->lock); - /* - * Port may or may not be 'valid' but we don't care about that when - * destroying the port and we are guaranteed that the port pointer - * will be valid until we've finished - */ - if (current_work() == &mgr->destroy_connector_work) { - kref_get(&port->kref); - rport = port; - } else if (mgr->mst_primary) { - rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary, - port); - } + if (mgr->mst_primary) + rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary, port); mutex_unlock(&mgr->lock); return rport; } @@ -2173,6 +2162,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms u8 buf; u32 offset = DP_DPCD_REV; + mutex_lock(&mgr->payload_lock); mutex_lock(&mgr->lock); if (mst_state == mgr->mst_state) goto out_unlock; @@ -2244,7 +2234,10 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms /* this can fail if the device is gone */ drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, 0); ret = 0; - memset(mgr->payloads, 0, mgr->max_payloads * sizeof(struct drm_dp_payload)); + memset(mgr->payloads, 0, + mgr->max_payloads * sizeof(mgr->payloads[0])); + memset(mgr->proposed_vcpis, 0, + mgr->max_payloads * sizeof(mgr->proposed_vcpis[0])); mgr->payload_mask = 0; set_bit(0, &mgr->payload_mask); mgr->vcpi_mask = 0; @@ -2252,6 +2245,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms out_unlock: mutex_unlock(&mgr->lock); + mutex_unlock(&mgr->payload_lock); if (mstb) drm_dp_put_mst_branch_device(mstb); return ret; diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 252e21c7d3eb924e08b4a13abcfa5adeb7debb82..df5fe163cfec8d0000391b2c85beaecaebb33fbc 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4774,7 +4774,7 @@ static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *d struct drm_display_mode *mode; unsigned pixel_clock = (timings->pixel_clock[0] | (timings->pixel_clock[1] << 8) | - (timings->pixel_clock[2] << 16)); + (timings->pixel_clock[2] << 16)) + 1; unsigned hactive = (timings->hactive[0] | timings->hactive[1] << 8) + 1; unsigned hblank = (timings->hblank[0] | timings->hblank[1] << 8) + 1; unsigned hsync = (timings->hsync[0] | (timings->hsync[1] & 0x7f) << 8) + 1; diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 1235c9877d6f1cebb50cf5c478c2f086d9da42a0..2078d7706a67bf54183cd930b1bc5c27cbc6c967 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -46,8 +46,6 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align) { drm_dma_handle_t *dmah; - unsigned long addr; - size_t sz; /* pci_alloc_consistent only guarantees alignment to the smallest * PAGE_SIZE order which is greater than or equal to the requested size. @@ -61,22 +59,13 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t ali return NULL; dmah->size = size; - dmah->vaddr = dma_alloc_coherent(&dev->pdev->dev, size, &dmah->busaddr, GFP_KERNEL | __GFP_COMP); + dmah->vaddr = dma_alloc_coherent(&dev->pdev->dev, size, &dmah->busaddr, GFP_KERNEL); if (dmah->vaddr == NULL) { kfree(dmah); return NULL; } - memset(dmah->vaddr, 0, size); - - /* XXX - Is virt_to_page() legal for consistent mem? */ - /* Reserve */ - for (addr = (unsigned long)dmah->vaddr, sz = size; - sz > 0; addr += PAGE_SIZE, sz -= PAGE_SIZE) { - SetPageReserved(virt_to_page((void *)addr)); - } - return dmah; } @@ -89,19 +78,9 @@ EXPORT_SYMBOL(drm_pci_alloc); */ void __drm_legacy_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah) { - unsigned long addr; - size_t sz; - - if (dmah->vaddr) { - /* XXX - Is virt_to_page() legal for consistent mem? */ - /* Unreserve */ - for (addr = (unsigned long)dmah->vaddr, sz = dmah->size; - sz > 0; addr += PAGE_SIZE, sz -= PAGE_SIZE) { - ClearPageReserved(virt_to_page((void *)addr)); - } + if (dmah->vaddr) dma_free_coherent(&dev->pdev->dev, dmah->size, dmah->vaddr, dmah->busaddr); - } } /** diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c index ed9588f36bc9b4a6214eca1e2954f520a0ce8ef4..5fc1b41cb6c53fd62c7edfbc2be8cb7401720121 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c @@ -258,6 +258,8 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, unsigned int waitlink_offset = buffer->user_size - 16; u32 return_target, return_dwords; u32 link_target, link_dwords; + unsigned int new_flush_seq = READ_ONCE(gpu->mmu->flush_seq); + bool need_flush = gpu->flush_seq != new_flush_seq; if (drm_debug & DRM_UT_DRIVER) etnaviv_buffer_dump(gpu, buffer, 0, 0x50); @@ -270,14 +272,14 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, * need to append a mmu flush load state, followed by a new * link to this buffer - a total of four additional words. */ - if (gpu->mmu->need_flush || gpu->switch_context) { + if (need_flush || gpu->switch_context) { u32 target, extra_dwords; /* link command */ extra_dwords = 1; /* flush command */ - if (gpu->mmu->need_flush) { + if (need_flush) { if (gpu->mmu->version == ETNAVIV_IOMMU_V1) extra_dwords += 1; else @@ -290,7 +292,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords); - if (gpu->mmu->need_flush) { + if (need_flush) { /* Add the MMU flush */ if (gpu->mmu->version == ETNAVIV_IOMMU_V1) { CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU, @@ -310,7 +312,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, SYNC_RECIPIENT_PE); } - gpu->mmu->need_flush = false; + gpu->flush_seq = new_flush_seq; } if (gpu->switch_context) { diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index a1562f89c3d7c4e9f0933872460ed8006783f363..1f8c8e4328e45e4e35f59a45576014e3e1dd77d4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1353,7 +1353,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, gpu->active_fence = submit->fence->seqno; if (gpu->lastctx != cmdbuf->ctx) { - gpu->mmu->need_flush = true; + gpu->mmu->flush_seq++; gpu->switch_context = true; gpu->lastctx = cmdbuf->ctx; } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 689cb8f3680c97bcd62836bfd90885f134862650..62b2877d090bb519c0351c1cd7405a191d133d1d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -138,6 +138,7 @@ struct etnaviv_gpu { struct etnaviv_iommu *mmu; struct etnaviv_cmdbuf_suballoc *cmdbuf_suballoc; + unsigned int flush_seq; /* Power Control: */ struct clk *clk_bus; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index f103e787de94379e127947ad1707af1f54913d67..0e23a0542f0a9170c4ed93afbf7dc4fe4648d54f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -132,7 +132,7 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu, */ if (mmu->last_iova) { mmu->last_iova = 0; - mmu->need_flush = true; + mmu->flush_seq++; continue; } @@ -246,7 +246,7 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu, } list_add_tail(&mapping->mmu_node, &mmu->mappings); - mmu->need_flush = true; + mmu->flush_seq++; mutex_unlock(&mmu->lock); return ret; @@ -264,7 +264,7 @@ void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu, etnaviv_iommu_remove_mapping(mmu, mapping); list_del(&mapping->mmu_node); - mmu->need_flush = true; + mmu->flush_seq++; mutex_unlock(&mmu->lock); } @@ -346,7 +346,7 @@ int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr, return ret; } mmu->last_iova = vram_node->start + size; - gpu->mmu->need_flush = true; + mmu->flush_seq++; mutex_unlock(&mmu->lock); *iova = (u32)vram_node->start; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h index 54be289e5981c65a9fc2e5a0c11e49071918a21a..ccb6ad3582b82ba4fd55359e9973a3606dc94501 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h @@ -44,7 +44,7 @@ struct etnaviv_iommu { struct list_head mappings; struct drm_mm mm; u32 last_iova; - bool need_flush; + unsigned int flush_seq; }; struct etnaviv_gem_object; diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 7904ffa9abfb95ca17bf3f938b83e19c67a0c897..366c975cde5b7041bbfc2f78de6088062c7c7308 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1739,8 +1739,9 @@ static int exynos_dsi_probe(struct platform_device *pdev) ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret) { - dev_info(dev, "failed to get regulators: %d\n", ret); - return -EPROBE_DEFER; + if (ret != -EPROBE_DEFER) + dev_info(dev, "failed to get regulators: %d\n", ret); + return ret; } dsi->clks = devm_kzalloc(dev, @@ -1753,9 +1754,10 @@ static int exynos_dsi_probe(struct platform_device *pdev) dsi->clks[i] = devm_clk_get(dev, clk_names[i]); if (IS_ERR(dsi->clks[i])) { if (strcmp(clk_names[i], "sclk_mipi") == 0) { - strcpy(clk_names[i], OLD_SCLK_MIPI_CLK_NAME); - i--; - continue; + dsi->clks[i] = devm_clk_get(dev, + OLD_SCLK_MIPI_CLK_NAME); + if (!IS_ERR(dsi->clks[i])) + continue; } dev_info(dev, "failed to get the clock: %s\n", diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index 2570c7f647a637f3ea5eabf4030e77888116a33b..883fc45870dd9e992f2a8d964729cd0eb676dc50 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -486,6 +486,7 @@ static int psbfb_probe(struct drm_fb_helper *helper, container_of(helper, struct psb_fbdev, psb_fb_helper); struct drm_device *dev = psb_fbdev->psb_fb_helper.dev; struct drm_psb_private *dev_priv = dev->dev_private; + unsigned int fb_size; int bytespp; bytespp = sizes->surface_bpp / 8; @@ -495,8 +496,11 @@ static int psbfb_probe(struct drm_fb_helper *helper, /* If the mode will not fit in 32bit then switch to 16bit to get a console on full resolution. The X mode setting server will allocate its own 32bit GEM framebuffer */ - if (ALIGN(sizes->fb_width * bytespp, 64) * sizes->fb_height > - dev_priv->vram_stolen_size) { + fb_size = ALIGN(sizes->surface_width * bytespp, 64) * + sizes->surface_height; + fb_size = ALIGN(fb_size, PAGE_SIZE); + + if (fb_size > dev_priv->vram_stolen_size) { sizes->surface_bpp = 16; sizes->surface_depth = 16; } diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 02c61a1ad56a213146f808eb506ac00dccc257d3..e9f9063dbf6339b2c68ab4e46623d3b25d0b6b1b 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -513,9 +513,9 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, intel_vgpu_reset_mmio(vgpu, dmlr); populate_pvinfo_page(vgpu); - intel_vgpu_reset_display(vgpu); if (dmlr) { + intel_vgpu_reset_display(vgpu); intel_vgpu_reset_cfg_space(vgpu); /* only reset the failsafe mode when dmlr reset */ vgpu->failsafe = false; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 658b8dd45b834fb463cab8424e5067dc82258e39..3ea311d32fa9eba50d1e8c19b989b1067131b1e1 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -307,6 +307,7 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) { struct drm_device *drm = mtk_crtc->base.dev; + struct drm_crtc *crtc = &mtk_crtc->base; int i; DRM_DEBUG_DRIVER("%s\n", __func__); @@ -328,6 +329,13 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) mtk_disp_mutex_unprepare(mtk_crtc->mutex); pm_runtime_put(drm->dev); + + if (crtc->state->event && !crtc->state->active) { + spin_lock_irq(&crtc->dev->event_lock); + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + spin_unlock_irq(&crtc->dev->event_lock); + } } static void mtk_crtc_ddp_config(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 855248132b2bd2d0a24c878b0066074e412f83ca..9fbfa9f94e6ce6aa951acc489b791bc6cef5d708 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -400,7 +400,7 @@ static int dsi_mgr_connector_get_modes(struct drm_connector *connector) return num; } -static int dsi_mgr_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status dsi_mgr_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { int id = dsi_mgr_connector_get_id(connector); @@ -543,6 +543,7 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) struct msm_dsi *msm_dsi1 = dsi_mgr_get_dsi(DSI_1); struct mipi_dsi_host *host = msm_dsi->host; struct drm_panel *panel = msm_dsi->panel; + struct msm_dsi_pll *src_pll; bool is_dual_dsi = IS_DUAL_DSI(); int ret; @@ -583,6 +584,10 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) id, ret); } + /* Save PLL status if it is a clock source */ + src_pll = msm_dsi_phy_get_pll(msm_dsi->phy); + msm_dsi_pll_save_state(src_pll); + ret = msm_dsi_host_power_off(host); if (ret) pr_err("%s: host %d power off failed,%d\n", __func__, id, ret); diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 7c9bf91bc22b6dd20615cb9181bec1446e3f72be..c0a7fa56d9a74bfd3d85ebaaae4daac6f06234d6 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -613,10 +613,6 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy) if (!phy || !phy->cfg->ops.disable) return; - /* Save PLL status if it is a clock source */ - if (phy->usecase != MSM_DSI_PHY_SLAVE) - msm_dsi_pll_save_state(phy->pll); - phy->cfg->ops.disable(phy); dsi_phy_regulator_disable(phy); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 44097767700124df63f8a325969ea44b494256d7..99d356b6e915159a224041c8333a4231e8574b1c 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -1004,8 +1004,8 @@ static void mdp5_crtc_wait_for_pp_done(struct drm_crtc *crtc) ret = wait_for_completion_timeout(&mdp5_crtc->pp_completion, msecs_to_jiffies(50)); if (ret == 0) - dev_warn(dev->dev, "pp done time out, lm=%d\n", - mdp5_cstate->pipeline.mixer->lm); + dev_warn_ratelimited(dev->dev, "pp done time out, lm=%d\n", + mdp5_cstate->pipeline.mixer->lm); } static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 9a8ce342e040efd4918a4a8f9606002abe3d4aa4..ec037cd5e2346447ad347670353e662389152ad7 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -562,6 +562,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) if (ret) goto fail; + if (!dev->dma_parms) { + dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), + GFP_KERNEL); + if (!dev->dma_parms) + return -ENOMEM; + } + dma_set_max_seg_size(dev, DMA_BIT_MASK(32)); + switch (get_mdp_ver(pdev)) { case KMS_MDP4: kms = mdp4_kms_init(ddev); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 6f48fc1cd1d1f08c3d3d4815f05716cbcf45d15b..ec263f2b31e9a8da10acecda0abdb34da53de6e3 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -44,6 +44,46 @@ static bool use_pages(struct drm_gem_object *obj) return !msm_obj->vram_node; } +/* + * Cache sync.. this is a bit over-complicated, to fit dma-mapping + * API. Really GPU cache is out of scope here (handled on cmdstream) + * and all we need to do is invalidate newly allocated pages before + * mapping to CPU as uncached/writecombine. + * + * On top of this, we have the added headache, that depending on + * display generation, the display's iommu may be wired up to either + * the toplevel drm device (mdss), or to the mdp sub-node, meaning + * that here we either have dma-direct or iommu ops. + * + * Let this be a cautionary tail of abstraction gone wrong. + */ + +static void sync_for_device(struct msm_gem_object *msm_obj) +{ + struct device *dev = msm_obj->base.dev->dev; + + if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } else { + dma_map_sg(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } +} + +static void sync_for_cpu(struct msm_gem_object *msm_obj) +{ + struct device *dev = msm_obj->base.dev->dev; + + if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } else { + dma_unmap_sg(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } +} + /* allocate pages from VRAM carveout, used when no IOMMU: */ static struct page **get_pages_vram(struct drm_gem_object *obj, int npages) { @@ -112,8 +152,7 @@ static struct page **get_pages(struct drm_gem_object *obj) * so we don't get ourselves into trouble with a dirty cache */ if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) - dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + sync_for_device(msm_obj); } return msm_obj->pages; @@ -136,9 +175,17 @@ static void put_pages(struct drm_gem_object *obj) struct msm_gem_object *msm_obj = to_msm_bo(obj); if (msm_obj->pages) { - if (msm_obj->sgt) + if (msm_obj->sgt) { + /* For non-cached buffers, ensure the new + * pages are clean because display controller, + * GPU, etc. are not coherent: + */ + if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) + sync_for_cpu(msm_obj); + sg_free_table(msm_obj->sgt); - kfree(msm_obj->sgt); + kfree(msm_obj->sgt); + } if (use_pages(obj)) drm_gem_put_pages(obj, msm_obj->pages, true, false); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 99e14e3e0fe4d44f1a6c51e1225db7e1af21a1bb..72532539369fb84f7ee79c4182d1100222337dd0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -158,7 +158,7 @@ nouveau_fence_wait_uevent_handler(struct nvif_notify *notify) fence = list_entry(fctx->pending.next, typeof(*fence), head); chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); - if (nouveau_fence_update(fence->channel, fctx)) + if (nouveau_fence_update(chan, fctx)) ret = NVIF_NOTIFY_DROP; } spin_unlock_irqrestore(&fctx->lock, flags); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c index 0c0310498afdb99e59459b5e70873bc7ad6a5f55..cd9666583d4bd0ad8362bb86c0d79d791a888f51 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c @@ -73,6 +73,8 @@ nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug) if (debug > subdev->debug) return; + if (!mthd) + return; for (i = 0; (list = mthd->data[i].mthd) != NULL; i++) { u32 base = chan->head * mthd->addr; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c index de8b806b88fd9dfd8cc1e1b864a3974db04ffbf7..7618b2eb4fdfde85e0b92e946c1754a57ec1418f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c @@ -143,23 +143,24 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name, nent = (fuc.size / sizeof(struct gk20a_fw_av)); - pack = vzalloc((sizeof(*pack) * max_classes) + - (sizeof(*init) * (nent + 1))); + pack = vzalloc((sizeof(*pack) * (max_classes + 1)) + + (sizeof(*init) * (nent + max_classes + 1))); if (!pack) { ret = -ENOMEM; goto end; } - init = (void *)(pack + max_classes); + init = (void *)(pack + max_classes + 1); - for (i = 0; i < nent; i++) { - struct gf100_gr_init *ent = &init[i]; + for (i = 0; i < nent; i++, init++) { struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc.data)[i]; u32 class = av->addr & 0xffff; u32 addr = (av->addr & 0xffff0000) >> 14; if (prevclass != class) { - pack[classidx].init = ent; + if (prevclass) /* Add terminator to the method list. */ + init++; + pack[classidx].init = init; pack[classidx].type = class; prevclass = class; if (++classidx >= max_classes) { @@ -169,10 +170,10 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name, } } - ent->addr = addr; - ent->data = av->data; - ent->count = 1; - ent->pitch = 1; + init->addr = addr; + init->data = av->data; + init->count = 1; + init->pitch = 1; } *ppack = pack; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c index 30491d132d59c08cf6defd83c0169c1b83b227c7..fbd10a67c6c6a8d234e12ce9bde3e57fdedb464e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c @@ -108,6 +108,7 @@ gm20b_secboot_new(struct nvkm_device *device, int index, struct gm200_secboot *gsb; struct nvkm_acr *acr; + *psb = NULL; acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS) | BIT(NVKM_SECBOOT_FALCON_PMU)); if (IS_ERR(acr)) @@ -116,10 +117,8 @@ gm20b_secboot_new(struct nvkm_device *device, int index, acr->optional_falcons = BIT(NVKM_SECBOOT_FALCON_PMU); gsb = kzalloc(sizeof(*gsb), GFP_KERNEL); - if (!gsb) { - psb = NULL; + if (!gsb) return -ENOMEM; - } *psb = &gsb->base; ret = nvkm_secboot_ctor(&gm20b_secboot, acr, device, index, &gsb->base); diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 3eb92085114173554c3b9d28e72bd9c68dc1fb34..8334afa70b94ffe90803037c1f33b6f0a087eb9c 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -504,9 +504,10 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev, return ret; ret = qxl_release_reserve_list(release, true); - if (ret) + if (ret) { + qxl_release_free(qdev, release); return ret; - + } cmd = (struct qxl_surface_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_SURFACE_CMD_CREATE; cmd->flags = QXL_SURF_FLAG_KEEP_DATA; @@ -532,8 +533,8 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev, /* no need to add a release to the fence for this surface bo, since it is only released when we ask to destroy the surface and it would never signal otherwise */ - qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); surf->hw_surf_alloc = true; spin_lock(&qdev->surf_id_idr_lock); @@ -575,9 +576,8 @@ int qxl_hw_surface_dealloc(struct qxl_device *qdev, cmd->surface_id = id; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); - qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); return 0; } diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 573bab2221230d723852ab19f7d86f95f1050fd9..b209a25e307d57d206958b2e22010a072f982614 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -533,8 +533,8 @@ static int qxl_primary_apply_cursor(struct drm_plane *plane) cmd->u.set.visible = 1; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); qxl_release_fence_buffer_objects(release); + qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); return ret; @@ -701,8 +701,8 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, cmd->u.position.y = plane->state->crtc_y + fb->hot_y; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); qxl_release_fence_buffer_objects(release); + qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); if (old_cursor_bo) qxl_bo_unref(&old_cursor_bo); @@ -747,8 +747,8 @@ static void qxl_cursor_atomic_disable(struct drm_plane *plane, cmd->type = QXL_CURSOR_HIDE; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); qxl_release_fence_buffer_objects(release); + qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); } static int qxl_plane_prepare_fb(struct drm_plane *plane, diff --git a/drivers/gpu/drm/qxl/qxl_draw.c b/drivers/gpu/drm/qxl/qxl_draw.c index 4d8681e84e684f58fa5aec6e1da3c44818befb0e..d009f2bc28e93d2efd4a77770b623ab99cf58d82 100644 --- a/drivers/gpu/drm/qxl/qxl_draw.c +++ b/drivers/gpu/drm/qxl/qxl_draw.c @@ -241,8 +241,8 @@ void qxl_draw_opaque_fb(const struct qxl_fb_image *qxl_fb_image, qxl_bo_physical_address(qdev, dimage->bo, 0); qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); out_free_palette: if (palette_bo) @@ -348,9 +348,10 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, goto out_release_backoff; rects = drawable_set_clipping(qdev, num_clips, clips_bo); - if (!rects) + if (!rects) { + ret = -EINVAL; goto out_release_backoff; - + } drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); drawable->clip.type = SPICE_CLIP_TYPE_RECTS; @@ -381,8 +382,8 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, } qxl_bo_kunmap(clips_bo); - qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); out_release_backoff: if (ret) @@ -432,8 +433,8 @@ void qxl_draw_copyarea(struct qxl_device *qdev, drawable->u.copy_bits.src_pos.y = sy; qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); out_free_release: if (ret) @@ -476,8 +477,8 @@ void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec) qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); qxl_release_fence_buffer_objects(release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); out_free_release: if (ret) diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index 31effed4a3c823d8509af4338258d76c6ab65764..cede17585525fd93385b0748c5a55d27b091db67 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -257,11 +257,8 @@ static int qxl_process_single_command(struct qxl_device *qdev, apply_surf_reloc(qdev, &reloc_info[i]); } + qxl_release_fence_buffer_objects(release); ret = qxl_push_command_ring_release(qdev, release, cmd->type, true); - if (ret) - qxl_release_backoff_reserve_list(release); - else - qxl_release_fence_buffer_objects(release); out_free_bos: out_free_release: diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 4f94b78cb46472bba2d60116139bc2aeea0a8f2b..d86110cdf085254e7d65d89c85b165a0c5057eb1 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -119,6 +119,8 @@ static void dce5_crtc_load_lut(struct drm_crtc *crtc) DRM_DEBUG_KMS("%d\n", radeon_crtc->crtc_id); + msleep(10); + WREG32(NI_INPUT_CSC_CONTROL + radeon_crtc->crtc_offset, (NI_INPUT_CSC_GRPH_MODE(NI_INPUT_CSC_BYPASS) | NI_INPUT_CSC_OVL_MODE(NI_INPUT_CSC_BYPASS))); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 68eed684dff52077b3a3dd471f610dd7486d518f..1669af08fafa075e5c7e36e1523212b4b1f990d4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -245,15 +245,23 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc) if (zero_alloc) page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; case ttm_bo_type_kernel: - bo->ttm = bdev->driver->ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT, - page_flags, glob->dummy_read_page); + if (bdev->driver->ttm_tt_create2) + bo->ttm = bdev->driver->ttm_tt_create2(bo, page_flags, + glob->dummy_read_page); + else + bo->ttm = bdev->driver->ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT, + page_flags, glob->dummy_read_page); if (unlikely(bo->ttm == NULL)) ret = -ENOMEM; break; case ttm_bo_type_sg: - bo->ttm = bdev->driver->ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT, - page_flags | TTM_PAGE_FLAG_SG, - glob->dummy_read_page); + if (bdev->driver->ttm_tt_create2) + bo->ttm = bdev->driver->ttm_tt_create2(bo, page_flags | TTM_PAGE_FLAG_SG, + glob->dummy_read_page); + else + bo->ttm = bdev->driver->ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT, + page_flags | TTM_PAGE_FLAG_SG, + glob->dummy_read_page); if (unlikely(bo->ttm == NULL)) { ret = -ENOMEM; break; diff --git a/drivers/gpu/drm/virtio/Kconfig b/drivers/gpu/drm/virtio/Kconfig index 0c384d9a2b75e3e47b5c064ef76ee64f6afbaaeb..46010fc78f52d0e9510a63a8b5268bf1d650cec6 100644 --- a/drivers/gpu/drm/virtio/Kconfig +++ b/drivers/gpu/drm/virtio/Kconfig @@ -1,6 +1,6 @@ config DRM_VIRTIO_GPU tristate "Virtio GPU driver" - depends on DRM && VIRTIO && MMU + depends on DRM && VIRTIO && MMU && PCI select DRM_KMS_HELPER select DRM_TTM help diff --git a/drivers/gpu/drm/virtio/Makefile b/drivers/gpu/drm/virtio/Makefile index 11e25e9a4c45380200fcfa0815933751e3939c0a..42949a17ff701ec10853a23e98e165e4869b704a 100644 --- a/drivers/gpu/drm/virtio/Makefile +++ b/drivers/gpu/drm/virtio/Makefile @@ -3,7 +3,7 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -virtio-gpu-y := virtgpu_drv.o virtgpu_kms.o virtgpu_drm_bus.o virtgpu_gem.o \ +virtio-gpu-y := virtgpu_drv.o virtgpu_kms.o virtgpu_gem.o \ virtgpu_fb.o virtgpu_display.o virtgpu_vq.o virtgpu_ttm.o \ virtgpu_fence.o virtgpu_object.o virtgpu_debugfs.o virtgpu_plane.o \ virtgpu_ioctl.o virtgpu_prime.o virtgpu_trace_points.o diff --git a/drivers/gpu/drm/virtio/virtgpu_debugfs.c b/drivers/gpu/drm/virtio/virtgpu_debugfs.c index ed0fcda713c3688ed3d2d209f04bd552c43d175a..0e29f1ce3f690a1764ed7e181d9bf67743733977 100644 --- a/drivers/gpu/drm/virtio/virtgpu_debugfs.c +++ b/drivers/gpu/drm/virtio/virtgpu_debugfs.c @@ -47,6 +47,8 @@ static int virtio_gpu_features(struct seq_file *m, void *data) virtio_add_bool(m, "virgl", vgdev->has_virgl_3d); virtio_add_bool(m, "edid", vgdev->has_edid); + virtio_add_bool(m, "resource blob", vgdev->has_resource_blob); + virtio_add_bool(m, "host visible", vgdev->has_host_visible); virtio_add_int(m, "cap sets", vgdev->num_capsets); virtio_add_int(m, "scanouts", vgdev->num_scanouts); return 0; diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index 443cf4c9307471e71218f3907da3d560710b74f7..3922de4a4f713577a22491bccb6e97376570eb90 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -356,7 +356,7 @@ static const struct drm_mode_config_funcs virtio_gpu_mode_funcs = { .atomic_commit = drm_atomic_helper_commit, }; -int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev) +void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev) { int i; @@ -374,7 +374,6 @@ int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev) vgdev_output_init(vgdev, i); drm_mode_config_reset(vgdev->ddev); - return 0; } void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev) diff --git a/drivers/gpu/drm/virtio/virtgpu_drm_bus.c b/drivers/gpu/drm/virtio/virtgpu_drm_bus.c deleted file mode 100644 index fe3b752cf6cdc3a8ed4785be13d4ce51afceb81d..0000000000000000000000000000000000000000 --- a/drivers/gpu/drm/virtio/virtgpu_drm_bus.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2015 Red Hat, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include - -#include "virtgpu_drv.h" - -static void virtio_pci_kick_out_firmware_fb(struct pci_dev *pci_dev) -{ - struct apertures_struct *ap; - bool primary; - - ap = alloc_apertures(1); - if (!ap) - return; - - ap->ranges[0].base = pci_resource_start(pci_dev, 0); - ap->ranges[0].size = pci_resource_len(pci_dev, 0); - - primary = pci_dev->resource[PCI_ROM_RESOURCE].flags - & IORESOURCE_ROM_SHADOW; - - drm_fb_helper_remove_conflicting_framebuffers(ap, "virtiodrmfb", primary); - - kfree(ap); -} - -int drm_virtio_init(struct drm_driver *driver, struct virtio_device *vdev) -{ - struct drm_device *dev; - int ret; - - dev = drm_dev_alloc(driver, &vdev->dev); - if (IS_ERR(dev)) - return PTR_ERR(dev); - vdev->priv = dev; - - if (strcmp(vdev->dev.parent->bus->name, "pci") == 0) { - struct pci_dev *pdev = to_pci_dev(vdev->dev.parent); - const char *pname = dev_name(&pdev->dev); - bool vga = (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; - char unique[20]; - - DRM_INFO("pci: %s detected at %s\n", - vga ? "virtio-vga" : "virtio-gpu-pci", - pname); - dev->pdev = pdev; - if (vga) - virtio_pci_kick_out_firmware_fb(pdev); - - /* - * Normally the drm_dev_set_unique() call is done by core DRM. - * The following comment covers, why virtio cannot rely on it. - * - * Unlike the other virtual GPU drivers, virtio abstracts the - * underlying bus type by using struct virtio_device. - * - * Hence the dev_is_pci() check, used in core DRM, will fail - * and the unique returned will be the virtio_device "virtio0", - * while a "pci:..." one is required. - * - * A few other ideas were considered: - * - Extend the dev_is_pci() check [in drm_set_busid] to - * consider virtio. - * Seems like a bigger hack than what we have already. - * - * - Point drm_device::dev to the parent of the virtio_device - * Semantic changes: - * * Using the wrong device for i2c, framebuffer_alloc and - * prime import. - * Visual changes: - * * Helpers such as DRM_DEV_ERROR, dev_info, drm_printer, - * will print the wrong information. - * - * We could address the latter issues, by introducing - * drm_device::bus_dev, ... which would be used solely for this. - * - * So for the moment keep things as-is, with a bulky comment - * for the next person who feels like removing this - * drm_dev_set_unique() quirk. - */ - snprintf(unique, sizeof(unique), "pci:%s", pname); - ret = drm_dev_set_unique(dev, unique); - if (ret) - goto err_free; - - } - - ret = drm_dev_register(dev, 0); - if (ret) - goto err_free; - - return 0; - -err_free: - drm_dev_unref(dev); - return ret; -} diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 9f592f9353a69f0313f0815aa79b1cc03c3567b3..a58e9d52c07e111c7f36dadf95a75aa34dd93c94 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -40,21 +40,118 @@ static int virtio_gpu_modeset = -1; MODULE_PARM_DESC(modeset, "Disable/Enable modesetting"); module_param_named(modeset, virtio_gpu_modeset, int, 0400); +static void virtio_pci_kick_out_firmware_fb(struct pci_dev *pci_dev) +{ + struct apertures_struct *ap; + bool primary; + + ap = alloc_apertures(1); + if (!ap) + return; + + ap->ranges[0].base = pci_resource_start(pci_dev, 0); + ap->ranges[0].size = pci_resource_len(pci_dev, 0); + + primary = pci_dev->resource[PCI_ROM_RESOURCE].flags + & IORESOURCE_ROM_SHADOW; + + drm_fb_helper_remove_conflicting_framebuffers(ap, "virtiodrmfb", primary); + + kfree(ap); +} + +static int virtio_gpu_pci_quirk(struct drm_device *dev, struct virtio_device *vdev) +{ + struct pci_dev *pdev = to_pci_dev(vdev->dev.parent); + const char *pname = dev_name(&pdev->dev); + bool vga = (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; + char unique[20]; + + DRM_INFO("pci: %s detected at %s\n", + vga ? "virtio-vga" : "virtio-gpu-pci", + pname); + dev->pdev = pdev; + if (vga) + virtio_pci_kick_out_firmware_fb(pdev); + + /* + * Normally the drm_dev_set_unique() call is done by core DRM. + * The following comment covers, why virtio cannot rely on it. + * + * Unlike the other virtual GPU drivers, virtio abstracts the + * underlying bus type by using struct virtio_device. + * + * Hence the dev_is_pci() check, used in core DRM, will fail + * and the unique returned will be the virtio_device "virtio0", + * while a "pci:..." one is required. + * + * A few other ideas were considered: + * - Extend the dev_is_pci() check [in drm_set_busid] to + * consider virtio. + * Seems like a bigger hack than what we have already. + * + * - Point drm_device::dev to the parent of the virtio_device + * Semantic changes: + * * Using the wrong device for i2c, framebuffer_alloc and + * prime import. + * Visual changes: + * * Helpers such as DRM_DEV_ERROR, dev_info, drm_printer, + * will print the wrong information. + * + * We could address the latter issues, by introducing + * drm_device::bus_dev, ... which would be used solely for this. + * + * So for the moment keep things as-is, with a bulky comment + * for the next person who feels like removing this + * drm_dev_set_unique() quirk. + */ + snprintf(unique, sizeof(unique), "pci:%s", pname); + return drm_dev_set_unique(dev, unique); +} + static int virtio_gpu_probe(struct virtio_device *vdev) { + struct drm_device *dev; + int ret; + if (vgacon_text_force() && virtio_gpu_modeset == -1) return -EINVAL; if (virtio_gpu_modeset == 0) return -EINVAL; - return drm_virtio_init(&driver, vdev); + dev = drm_dev_alloc(&driver, &vdev->dev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + vdev->priv = dev; + + if (!strcmp(vdev->dev.parent->bus->name, "pci")) { + ret = virtio_gpu_pci_quirk(dev, vdev); + if (ret) + goto err_free; + } + + ret = virtio_gpu_init(dev); + if (ret) + goto err_free; + + ret = drm_dev_register(dev, 0); + if (ret) + goto err_free; + + return 0; + +err_free: + drm_dev_unref(dev); + return ret; } static void virtio_gpu_remove(struct virtio_device *vdev) { struct drm_device *dev = vdev->priv; + drm_dev_unregister(dev); + virtio_gpu_deinit(dev); drm_put_dev(dev); } @@ -81,6 +178,8 @@ static unsigned int features[] = { VIRTIO_GPU_F_VIRGL, #endif VIRTIO_GPU_F_EDID, + VIRTIO_GPU_F_RESOURCE_BLOB, + VIRTIO_GPU_F_HOST_VISIBLE, }; static struct virtio_driver virtio_gpu_driver = { .feature_table = features, @@ -116,8 +215,6 @@ static const struct file_operations virtio_gpu_driver_fops = { static struct drm_driver driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_RENDER | DRIVER_ATOMIC, - .load = virtio_gpu_driver_load, - .unload = virtio_gpu_driver_unload, .open = virtio_gpu_driver_open, .postclose = virtio_gpu_driver_postclose, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 05936432632b66ae893b2a85ce0d578c1cd8706c..9db4d6381bf1a10b4fbf563977ebbbb610f58045 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -50,9 +50,6 @@ #define DRIVER_MINOR 1 #define DRIVER_PATCHLEVEL 0 -/* virtgpu_drm_bus.c */ -int drm_virtio_init(struct drm_driver *driver, struct virtio_device *vdev); - struct virtio_gpu_object_params { uint32_t format; uint32_t width; @@ -61,6 +58,8 @@ struct virtio_gpu_object_params { bool dumb; /* 3d */ bool virgl; + bool blob; + uint32_t blob_mem; uint32_t target; uint32_t bind; uint32_t depth; @@ -74,15 +73,24 @@ struct virtio_gpu_object { struct drm_gem_object gem_base; uint32_t hw_res_handle; + bool create_callback_done; + /* These variables are only valid if create_callback_done is true */ + uint32_t num_planes; + uint64_t format_modifier; + uint32_t strides[4]; + uint32_t offsets[4]; + struct sg_table *pages; uint32_t mapped; void *vmap; bool dumb; + bool blob; struct ttm_place placement_code; struct ttm_placement placement; struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; bool created; + uint32_t blob_mem; }; #define gem_to_virtio_gpu_obj(gobj) \ container_of((gobj), struct virtio_gpu_object, gem_base) @@ -224,12 +232,22 @@ struct virtio_gpu_device { bool has_virgl_3d; bool has_edid; + bool has_resource_blob; + bool has_host_visible; struct work_struct config_changed_work; struct virtio_gpu_drv_capset *capsets; uint32_t num_capsets; struct list_head cap_cache; + + /* coherent memory */ + int cbar; + unsigned long caddr; + unsigned long csize; + + struct idr request_idr; + spinlock_t request_idr_lock; }; struct virtio_gpu_fpriv { @@ -237,15 +255,15 @@ struct virtio_gpu_fpriv { }; /* virtio_ioctl.c */ -#define DRM_VIRTIO_NUM_IOCTLS 10 +#define DRM_VIRTIO_NUM_IOCTLS 13 extern struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS]; int virtio_gpu_object_list_validate(struct ww_acquire_ctx *ticket, struct list_head *head); void virtio_gpu_unref_list(struct list_head *head); /* virtio_kms.c */ -int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags); -void virtio_gpu_driver_unload(struct drm_device *dev); +int virtio_gpu_init(struct drm_device *dev); +void virtio_gpu_deinit(struct drm_device *dev); int virtio_gpu_driver_open(struct drm_device *dev, struct drm_file *file); void virtio_gpu_driver_postclose(struct drm_device *dev, struct drm_file *file); @@ -342,11 +360,28 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, uint64_t offset, uint32_t level, struct virtio_gpu_box *box, struct virtio_gpu_fence *fence); -void +int virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, struct virtio_gpu_object_params *params, struct virtio_gpu_fence *fence); + +void +virtio_gpu_cmd_resource_create_blob(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object *bo, + uint32_t ctx_id, uint32_t blob_mem, + uint32_t blob_flags, uint64_t blob_id, + uint64_t size, uint32_t nents, + struct virtio_gpu_mem_entry *ents); + +void virtio_gpu_cmd_map(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object *bo, + uint64_t offset, + struct virtio_gpu_fence *fence); + +void virtio_gpu_cmd_unmap(struct virtio_gpu_device *vgdev, + uint32_t resource_id); + void virtio_gpu_ctrl_ack(struct virtqueue *vq); void virtio_gpu_cursor_ack(struct virtqueue *vq); void virtio_gpu_fence_ack(struct virtqueue *vq); @@ -359,7 +394,7 @@ int virtio_gpu_framebuffer_init(struct drm_device *dev, struct virtio_gpu_framebuffer *vgfb, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj); -int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev); +void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev); void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev); /* virtio_gpu_plane.c */ diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 402d01ed255e9a6729e531e84dfc5b96cec6b986..4235e142ff84c10e9bc097c70ecd51239f265464 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -25,10 +25,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include +#include + #include #include #include -#include #include "virtgpu_drv.h" @@ -258,6 +261,12 @@ static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data, case VIRTGPU_PARAM_CAPSET_QUERY_FIX: value = 1; break; + case VIRTGPU_PARAM_RESOURCE_BLOB: + value = vgdev->has_resource_blob == true ? 1 : 0; + break; + case VIRTGPU_PARAM_HOST_VISIBLE: + value = vgdev->has_host_visible == true ? 1 : 0; + break; default: return -EINVAL; } @@ -334,9 +343,11 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data, static int virtio_gpu_resource_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct virtio_gpu_device *vgdev = dev->dev_private; struct drm_virtgpu_resource_info *ri = data; struct drm_gem_object *gobj = NULL; struct virtio_gpu_object *qobj = NULL; + int ret = 0; gobj = drm_gem_object_lookup(file_priv, ri->bo_handle); if (gobj == NULL) @@ -344,10 +355,30 @@ static int virtio_gpu_resource_info_ioctl(struct drm_device *dev, void *data, qobj = gem_to_virtio_gpu_obj(gobj); - ri->size = qobj->gem_base.size; ri->res_handle = qobj->hw_res_handle; + ri->size = qobj->gem_base.size; + + if (!qobj->create_callback_done) { + ret = wait_event_interruptible(vgdev->resp_wq, + qobj->create_callback_done); + if (ret) + goto out; + } + + if (qobj->num_planes) { + int i; + + ri->num_planes = qobj->num_planes; + for (i = 0; i < qobj->num_planes; i++) { + ri->strides[i] = qobj->strides[i]; + ri->offsets[i] = qobj->offsets[i]; + } + } + + ri->format_modifier = qobj->format_modifier; +out: drm_gem_object_put_unlocked(gobj); - return 0; + return ret; } static int virtio_gpu_transfer_from_host_ioctl(struct drm_device *dev, @@ -550,6 +581,134 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev, return 0; } +static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, + void *data, struct drm_file *file) +{ + uint32_t device_blob_mem = 0; + int ret, si, nents; + uint32_t handle = 0; + struct scatterlist *sg; + struct virtio_gpu_object *obj; + struct virtio_gpu_fence *fence; + struct virtio_gpu_mem_entry *ents; + struct drm_virtgpu_resource_create_blob *rc_blob = data; + struct virtio_gpu_object_params params = { 0 }; + struct virtio_gpu_device *vgdev = dev->dev_private; + struct virtio_gpu_fpriv *vfpriv = file->driver_priv; + bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); + bool mappable = rc_blob->blob_flags & VIRTGPU_BLOB_FLAG_MAPPABLE; + bool has_guest = (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_GUEST || + rc_blob->blob_mem == VIRTGPU_BLOB_MEM_HOST_GUEST); + + params.size = rc_blob->size; + params.blob_mem = rc_blob->blob_mem; + params.blob = true; + + if (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_GUEST) + device_blob_mem = VIRTIO_GPU_BLOB_MEM_GUEST; + + if (vgdev->has_virgl_3d) { + if (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_HOST) + device_blob_mem = VIRTIO_GPU_BLOB_MEM_HOST3D; + else if (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_HOST_GUEST) + device_blob_mem = VIRTIO_GPU_BLOB_MEM_HOST3D_GUEST; + } else { + if (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_HOST) + device_blob_mem = VIRTIO_GPU_BLOB_MEM_HOSTSYS; + else if (rc_blob->blob_mem == VIRTGPU_BLOB_MEM_HOST_GUEST) + device_blob_mem = VIRTIO_GPU_BLOB_MEM_HOSTSYS_GUEST; + } + + if (rc_blob->cmd_size) { + void *buf; + void __user *cmd = u64_to_user_ptr(rc_blob->cmd); + + buf = kzalloc(rc_blob->cmd_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, cmd, rc_blob->cmd_size)) { + kfree(buf); + return -EFAULT; + } + + virtio_gpu_cmd_submit(vgdev, buf, rc_blob->cmd_size, + vfpriv->ctx_id, NULL); + } + + obj = virtio_gpu_alloc_object(dev, ¶ms, NULL); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (!obj->pages) { + ret = virtio_gpu_object_get_sg_table(vgdev, obj); + if (ret) + goto err_free_obj; + } + + if (!has_guest) { + nents = 0; + } else if (use_dma_api) { + obj->mapped = dma_map_sg(vgdev->vdev->dev.parent, + obj->pages->sgl, obj->pages->nents, + DMA_TO_DEVICE); + nents = obj->mapped; + } else { + nents = obj->pages->nents; + } + + ents = kzalloc(nents * sizeof(struct virtio_gpu_mem_entry), GFP_KERNEL); + if (has_guest) { + for_each_sg(obj->pages->sgl, sg, nents, si) { + ents[si].addr = cpu_to_le64(use_dma_api + ? sg_dma_address(sg) + : sg_phys(sg)); + ents[si].length = cpu_to_le32(sg->length); + ents[si].padding = 0; + } + } + + fence = virtio_gpu_fence_alloc(vgdev); + if (!fence) { + ret = -ENOMEM; + goto err_free_obj; + } + + virtio_gpu_cmd_resource_create_blob(vgdev, obj, vfpriv->ctx_id, + device_blob_mem, + rc_blob->blob_flags, + rc_blob->blob_id, + rc_blob->size, + nents, ents); + + ret = drm_gem_handle_create(file, &obj->gem_base, &handle); + if (ret) + goto err_fence_put; + + if (!has_guest && mappable) + virtio_gpu_cmd_map(vgdev, obj, obj->tbo.offset, fence); + + /* + * No need to call virtio_gpu_object_reserve since the buffer is not + * being used for ttm validation and no other processes can access + * the reservation object at this point. + */ + reservation_object_add_excl_fence(obj->tbo.resv, &fence->f); + + dma_fence_put(&fence->f); + drm_gem_object_put_unlocked(&obj->gem_base); + + rc_blob->res_handle = obj->hw_res_handle; + rc_blob->bo_handle = handle; + return 0; + +err_fence_put: + dma_fence_put(&fence->f); +err_free_obj: + drm_gem_object_release(&obj->gem_base); + return ret; +} + struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = { DRM_IOCTL_DEF_DRV(VIRTGPU_MAP, virtio_gpu_map_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), @@ -582,4 +741,8 @@ struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = { DRM_IOCTL_DEF_DRV(VIRTGPU_GET_CAPS, virtio_gpu_get_caps_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), + + DRM_IOCTL_DEF_DRV(VIRTGPU_RESOURCE_CREATE_BLOB, + virtio_gpu_resource_create_blob_ioctl, + DRM_RENDER_ALLOW) }; diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index d0f54b81eabaaa149c79b67d113d90b58b801185..b96a19005fb54a9c1198ff4307b32f5edb8b80dc 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -23,6 +23,7 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include #include #include #include @@ -127,7 +128,7 @@ static void virtio_gpu_get_capsets(struct virtio_gpu_device *vgdev, vgdev->num_capsets = num_capsets; } -int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) +int virtio_gpu_init(struct drm_device *dev) { static vq_callback_t *callbacks[] = { virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack @@ -157,6 +158,8 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) idr_init(&vgdev->ctx_id_idr); spin_lock_init(&vgdev->resource_idr_lock); idr_init(&vgdev->resource_idr); + spin_lock_init(&vgdev->request_idr_lock); + idr_init(&vgdev->request_idr); init_waitqueue_head(&vgdev->resp_wq); virtio_gpu_init_vq(&vgdev->ctrlq, virtio_gpu_dequeue_ctrl_func); virtio_gpu_init_vq(&vgdev->cursorq, virtio_gpu_dequeue_cursor_func); @@ -181,6 +184,32 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) DRM_INFO("EDID support available.\n"); } + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_BLOB)) { + vgdev->cbar = 4; + vgdev->caddr = pci_resource_start(dev->pdev, vgdev->cbar); + vgdev->csize = pci_resource_len(dev->pdev, vgdev->cbar); + ret = pci_request_region( + dev->pdev, + vgdev->cbar, + "virtio-gpu-coherent"); + if (ret != 0) { + DRM_WARN("Cannot request coherent memory bar\n"); + } else { + DRM_INFO("coherent host resources enabled\n"); + DRM_INFO( + "using %s bar %d, at 0x%lx, size %ld MB\n", + dev_name(&dev->pdev->dev), + vgdev->cbar, + vgdev->caddr, + vgdev->csize >> 20); + vgdev->has_host_visible = true; + } + + vgdev->has_resource_blob = true; + DRM_INFO("resource_v2: %u, host visible %u\n", + vgdev->has_resource_blob, vgdev->has_host_visible); + } + ret = virtio_find_vqs(vgdev->vdev, 2, vqs, callbacks, names, NULL); if (ret) { DRM_ERROR("failed to find virt queues\n"); @@ -216,9 +245,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) num_capsets, &num_capsets); DRM_INFO("number of cap sets: %d\n", num_capsets); - ret = virtio_gpu_modeset_init(vgdev); - if (ret) - goto err_modeset; + virtio_gpu_modeset_init(vgdev); virtio_device_ready(vgdev->vdev); vgdev->vqs_ready = true; @@ -235,7 +262,6 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) return 0; -err_modeset: err_scanouts: virtio_gpu_ttm_fini(vgdev); err_ttm: @@ -257,7 +283,7 @@ static void virtio_gpu_cleanup_cap_cache(struct virtio_gpu_device *vgdev) } } -void virtio_gpu_driver_unload(struct drm_device *dev) +void virtio_gpu_deinit(struct drm_device *dev) { struct virtio_gpu_device *vgdev = dev->dev_private; diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index e88c00367782e509c035962f1aff775c62b769f6..e442928d2848b3e829c95d1fc6bc8381631d8928 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -23,33 +23,56 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include #include "virtgpu_drv.h" +#include + +static int virtio_gpu_virglrenderer_workaround = 1; +module_param_named(virglhack, virtio_gpu_virglrenderer_workaround, int, 0400); static int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, uint32_t *resid) { - int handle; + if (virtio_gpu_virglrenderer_workaround) { + /* + * Hack to avoid re-using resource IDs. + * + * virglrenderer versions up to (and including) 0.7.0 + * can't deal with that. virglrenderer commit + * "f91a9dd35715 Fix unlinking resources from hash + * table." (Feb 2019) fixes the bug. + */ + static atomic_t seqno = ATOMIC_INIT(0); + int handle = atomic_inc_return(&seqno); + *resid = handle; + } else { + int handle; - idr_preload(GFP_KERNEL); - spin_lock(&vgdev->resource_idr_lock); - handle = idr_alloc(&vgdev->resource_idr, NULL, 1, 0, GFP_NOWAIT); - spin_unlock(&vgdev->resource_idr_lock); - idr_preload_end(); + idr_preload(GFP_KERNEL); + spin_lock(&vgdev->resource_idr_lock); + handle = idr_alloc(&vgdev->resource_idr, NULL, 1, 0, + GFP_NOWAIT); + spin_unlock(&vgdev->resource_idr_lock); + idr_preload_end(); - if (handle < 0) - return handle; + if (handle < 0) + return handle; - *resid = handle; + *resid = handle; + } return 0; } static void virtio_gpu_resource_id_put(struct virtio_gpu_device *vgdev, uint32_t id) { - spin_lock(&vgdev->resource_idr_lock); - idr_remove(&vgdev->resource_idr, id); - spin_unlock(&vgdev->resource_idr_lock); + if (!virtio_gpu_virglrenderer_workaround) { + spin_lock(&vgdev->resource_idr_lock); + idr_remove(&vgdev->resource_idr, id); + spin_unlock(&vgdev->resource_idr_lock); + } } static void virtio_gpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) @@ -71,17 +94,49 @@ static void virtio_gpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) kfree(bo); } +// define internally for testing purposes +#define VIRTGPU_BLOB_MEM_CACHE_MASK 0xf000 +#define VIRTGPU_BLOB_MEM_CACHE_CACHED 0x1000 +#define VIRTGPU_BLOB_MEM_CACHE_UNCACHED 0x2000 +#define VIRTGPU_BLOB_MEM_CACHE_WC 0x3000 + static void virtio_gpu_init_ttm_placement(struct virtio_gpu_object *vgbo) { u32 c = 1; + u32 ttm_caching_flags = 0; + + u32 cache_type = (vgbo->blob_mem & VIRTGPU_BLOB_MEM_CACHE_MASK); + bool has_guest = (vgbo->blob_mem == VIRTGPU_BLOB_MEM_GUEST || + vgbo->blob_mem == VIRTGPU_BLOB_MEM_HOST_GUEST); vgbo->placement.placement = &vgbo->placement_code; vgbo->placement.busy_placement = &vgbo->placement_code; vgbo->placement_code.fpfn = 0; vgbo->placement_code.lpfn = 0; - vgbo->placement_code.flags = - TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT | - TTM_PL_FLAG_NO_EVICT; + + switch (cache_type) { + case VIRTGPU_BLOB_MEM_CACHE_CACHED: + ttm_caching_flags = TTM_PL_FLAG_CACHED; + break; + case VIRTGPU_BLOB_MEM_CACHE_WC: + ttm_caching_flags = TTM_PL_FLAG_WC; + break; + case VIRTGPU_BLOB_MEM_CACHE_UNCACHED: + ttm_caching_flags = TTM_PL_FLAG_UNCACHED; + break; + default: + ttm_caching_flags = TTM_PL_MASK_CACHING; + } + + if (!has_guest && vgbo->blob) { + vgbo->placement_code.flags = + ttm_caching_flags | TTM_PL_FLAG_VRAM | + TTM_PL_FLAG_NO_EVICT; + } else { + vgbo->placement_code.flags = + TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT | + TTM_PL_FLAG_NO_EVICT; + } vgbo->placement.num_placement = c; vgbo->placement.num_busy_placement = c; @@ -117,10 +172,12 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, return ret; } bo->dumb = params->dumb; + bo->blob = params->blob; + bo->blob_mem = params->blob_mem; if (params->virgl) { virtio_gpu_cmd_resource_create_3d(vgdev, bo, params, fence); - } else { + } else if (params->dumb) { virtio_gpu_cmd_create_resource(vgdev, bo, params, fence); } diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c index aff33b9b47dab2b3eaa14600e38595bbe24daf14..6c1f02b4f17c5504d1cc9ca7b0756a3ef20ebc89 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ttm.c +++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c @@ -185,6 +185,12 @@ static int virtio_gpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; break; + case TTM_PL_VRAM: + man->func = &ttm_bo_manager_func; + man->flags = TTM_MEMTYPE_FLAG_MAPPABLE; + man->available_caching = TTM_PL_MASK_CACHING; + man->default_caching = TTM_PL_FLAG_CACHED; + break; default: DRM_ERROR("Unsupported memory type %u\n", (unsigned int)type); return -EINVAL; @@ -216,6 +222,7 @@ static int virtio_gpu_verify_access(struct ttm_buffer_object *bo, static int virtio_gpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) { + struct virtio_gpu_device *vgdev = virtio_gpu_get_vgdev(bdev); struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; mem->bus.addr = NULL; @@ -229,8 +236,18 @@ static int virtio_gpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, case TTM_PL_SYSTEM: case TTM_PL_TT: /* system memory */ + mem->bus.offset = 0; + mem->bus.base = 0; + mem->bus.is_iomem = false; + return 0; + case TTM_PL_VRAM: + /* coherent memory (pci bar) */ + mem->bus.offset = mem->start << PAGE_SHIFT; + mem->bus.base = vgdev->caddr; + mem->bus.is_iomem = true; return 0; default: + DRM_ERROR("Unsupported memory type %u\n", mem->mem_type); return -EINVAL; } return 0; @@ -246,33 +263,54 @@ static void virtio_gpu_ttm_io_mem_free(struct ttm_bo_device *bdev, */ struct virtio_gpu_ttm_tt { struct ttm_dma_tt ttm; - struct virtio_gpu_device *vgdev; - u64 offset; + struct virtio_gpu_object *obj; }; +static int virtio_gpu_ttm_vram_bind(struct ttm_tt *ttm, + struct ttm_mem_reg *bo_mem) +{ + return 0; +} + +static int virtio_gpu_ttm_vram_unbind(struct ttm_tt *ttm) +{ + struct virtio_gpu_ttm_tt *gtt = + container_of(ttm, struct virtio_gpu_ttm_tt, ttm.ttm); + struct virtio_gpu_device *vgdev = + virtio_gpu_get_vgdev(gtt->obj->tbo.bdev); + struct virtio_gpu_object *obj = gtt->obj; + + virtio_gpu_cmd_unmap(vgdev, obj->hw_res_handle); + return 0; +} + static int virtio_gpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { - struct virtio_gpu_ttm_tt *gtt = (void *)ttm; - - gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT); - if (!ttm->num_pages) - WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", - ttm->num_pages, bo_mem, ttm); + struct virtio_gpu_ttm_tt *gtt = + container_of(ttm, struct virtio_gpu_ttm_tt, ttm.ttm); + struct virtio_gpu_device *vgdev = + virtio_gpu_get_vgdev(gtt->obj->tbo.bdev); - /* Not implemented */ + virtio_gpu_object_attach(vgdev, gtt->obj, NULL); return 0; } static int virtio_gpu_ttm_backend_unbind(struct ttm_tt *ttm) { - /* Not implemented */ + struct virtio_gpu_ttm_tt *gtt = + container_of(ttm, struct virtio_gpu_ttm_tt, ttm.ttm); + struct virtio_gpu_device *vgdev = + virtio_gpu_get_vgdev(gtt->obj->tbo.bdev); + + virtio_gpu_object_detach(vgdev, gtt->obj); return 0; } -static void virtio_gpu_ttm_backend_destroy(struct ttm_tt *ttm) +static void virtio_gpu_ttm_tt_destroy(struct ttm_tt *ttm) { - struct virtio_gpu_ttm_tt *gtt = (void *)ttm; + struct virtio_gpu_ttm_tt *gtt = + container_of(ttm, struct virtio_gpu_ttm_tt, ttm.ttm); ttm_dma_tt_fini(>t->ttm); kfree(gtt); @@ -281,7 +319,13 @@ static void virtio_gpu_ttm_backend_destroy(struct ttm_tt *ttm) static struct ttm_backend_func virtio_gpu_backend_func = { .bind = &virtio_gpu_ttm_backend_bind, .unbind = &virtio_gpu_ttm_backend_unbind, - .destroy = &virtio_gpu_ttm_backend_destroy, + .destroy = &virtio_gpu_ttm_tt_destroy, +}; + +static struct ttm_backend_func virtio_gpu_vram_func = { + .bind = &virtio_gpu_ttm_vram_bind, + .unbind = &virtio_gpu_ttm_vram_unbind, + .destroy = &virtio_gpu_ttm_tt_destroy, }; static int virtio_gpu_ttm_tt_populate(struct ttm_tt *ttm) @@ -297,72 +341,43 @@ static void virtio_gpu_ttm_tt_unpopulate(struct ttm_tt *ttm) ttm_pool_unpopulate(ttm); } -static struct ttm_tt *virtio_gpu_ttm_tt_create(struct ttm_bo_device *bdev, - unsigned long size, - uint32_t page_flags, - struct page *dummy_read_page) +static struct ttm_tt *virtio_gpu_ttm_tt_create2(struct ttm_buffer_object *bo, + uint32_t page_flags, + struct page *dummy_read_page) { + unsigned long size = bo->num_pages << PAGE_SHIFT; struct virtio_gpu_device *vgdev; + struct virtio_gpu_object *obj; struct virtio_gpu_ttm_tt *gtt; + uint32_t has_guest; + + vgdev = virtio_gpu_get_vgdev(bo->bdev); + obj = container_of(bo, struct virtio_gpu_object, tbo); - vgdev = virtio_gpu_get_vgdev(bdev); gtt = kzalloc(sizeof(struct virtio_gpu_ttm_tt), GFP_KERNEL); if (gtt == NULL) return NULL; - gtt->ttm.ttm.func = &virtio_gpu_backend_func; - gtt->vgdev = vgdev; - if (ttm_dma_tt_init(>t->ttm, bdev, size, page_flags, - dummy_read_page)) { - kfree(gtt); - return NULL; - } - return >t->ttm.ttm; -} - -static void virtio_gpu_move_null(struct ttm_buffer_object *bo, - struct ttm_mem_reg *new_mem) -{ - struct ttm_mem_reg *old_mem = &bo->mem; - - BUG_ON(old_mem->mm_node != NULL); - *old_mem = *new_mem; - new_mem->mm_node = NULL; -} - -static int virtio_gpu_bo_move(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, - struct ttm_mem_reg *new_mem) -{ - int ret; - - ret = ttm_bo_wait(bo, interruptible, no_wait_gpu); - if (ret) - return ret; - - virtio_gpu_move_null(bo, new_mem); - return 0; -} - -static void virtio_gpu_bo_move_notify(struct ttm_buffer_object *tbo, - bool evict, - struct ttm_mem_reg *new_mem) -{ - struct virtio_gpu_object *bo; - struct virtio_gpu_device *vgdev; - - bo = container_of(tbo, struct virtio_gpu_object, tbo); - vgdev = (struct virtio_gpu_device *)bo->gem_base.dev->dev_private; - - if (!new_mem || (new_mem->placement & TTM_PL_FLAG_SYSTEM)) { - if (bo->hw_res_handle) - virtio_gpu_object_detach(vgdev, bo); - - } else if (new_mem->placement & TTM_PL_FLAG_TT) { - if (bo->hw_res_handle) { - virtio_gpu_object_attach(vgdev, bo, NULL); + gtt->obj = obj; + has_guest = (obj->blob_mem == VIRTGPU_BLOB_MEM_GUEST || + obj->blob_mem == VIRTGPU_BLOB_MEM_HOST_GUEST); + + if (!has_guest && obj->blob) { + gtt->ttm.ttm.func = &virtio_gpu_vram_func; + if (ttm_tt_init(>t->ttm.ttm, bo->bdev, size, page_flags, + dummy_read_page)) { + kfree(gtt); + return NULL; + } + } else { + gtt->ttm.ttm.func = &virtio_gpu_backend_func; + if (ttm_dma_tt_init(>t->ttm, bo->bdev, size, page_flags, + dummy_read_page)) { + kfree(gtt); + return NULL; } } + + return >t->ttm.ttm; } static void virtio_gpu_bo_swap_notify(struct ttm_buffer_object *tbo) @@ -376,19 +391,17 @@ static void virtio_gpu_bo_swap_notify(struct ttm_buffer_object *tbo) } static struct ttm_bo_driver virtio_gpu_bo_driver = { - .ttm_tt_create = &virtio_gpu_ttm_tt_create, + .ttm_tt_create2 = &virtio_gpu_ttm_tt_create2, .ttm_tt_populate = &virtio_gpu_ttm_tt_populate, .ttm_tt_unpopulate = &virtio_gpu_ttm_tt_unpopulate, .invalidate_caches = &virtio_gpu_invalidate_caches, .init_mem_type = &virtio_gpu_init_mem_type, .eviction_valuable = ttm_bo_eviction_valuable, .evict_flags = &virtio_gpu_evict_flags, - .move = &virtio_gpu_bo_move, .verify_access = &virtio_gpu_verify_access, .io_mem_reserve = &virtio_gpu_ttm_io_mem_reserve, .io_mem_free = &virtio_gpu_ttm_io_mem_free, .io_mem_pfn = ttm_bo_default_io_mem_pfn, - .move_notify = &virtio_gpu_bo_move_notify, .swap_notify = &virtio_gpu_bo_swap_notify, }; @@ -415,6 +428,15 @@ int virtio_gpu_ttm_init(struct virtio_gpu_device *vgdev) DRM_ERROR("Failed initializing GTT heap.\n"); goto err_mm_init; } + + if (vgdev->has_host_visible) { + r = ttm_bo_init_mm(&vgdev->mman.bdev, TTM_PL_VRAM, + vgdev->csize >> PAGE_SHIFT); + if (r) { + DRM_ERROR("Failed initializing VRAM heap.\n"); + goto err_mm_init; + } + } return 0; err_mm_init: diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index feddc29b3f2c163c8132e6f7e219ef1f9676ed7e..420db70018dc89346ecdb12b48bac0ee4bdb46f7 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -856,7 +856,45 @@ void virtio_gpu_cmd_context_detach_resource(struct virtio_gpu_device *vgdev, virtio_gpu_queue_ctrl_buffer(vgdev, vbuf); } -void +static void virtio_gpu_cmd_resource_create_cb(struct virtio_gpu_device *vgdev, + struct virtio_gpu_vbuffer *vbuf) +{ + struct virtio_gpu_resp_resource_plane_info *resp = + (struct virtio_gpu_resp_resource_plane_info *)vbuf->resp_buf; + struct virtio_gpu_object *obj = + (struct virtio_gpu_object *)vbuf->data_buf; + uint32_t resp_type = le32_to_cpu(resp->hdr.type); + int i; + + /* + * Keeps the data_buf, which points to this virtio_gpu_object, from + * getting kfree'd after this cb returns. + */ + vbuf->data_buf = NULL; + + switch (resp_type) { + case VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO: + case VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO_LEGACY: + break; + default: + goto finish_pending; + } + + obj->num_planes = le32_to_cpu(resp->num_planes); + obj->format_modifier = le64_to_cpu(resp->format_modifier); + + for (i = 0; i < obj->num_planes; i++) { + obj->strides[i] = le32_to_cpu(resp->strides[i]); + obj->offsets[i] = le32_to_cpu(resp->offsets[i]); + } + +finish_pending: + obj->create_callback_done = true; + drm_gem_object_put_unlocked(&obj->gem_base); + wake_up_all(&vgdev->resp_wq); +} + +int virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo, struct virtio_gpu_object_params *params, @@ -864,8 +902,15 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, { struct virtio_gpu_resource_create_3d *cmd_p; struct virtio_gpu_vbuffer *vbuf; + struct virtio_gpu_resp_resource_plane_info *resp_buf; - cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); + resp_buf = kzalloc(sizeof(*resp_buf), GFP_KERNEL); + if (!resp_buf) + return -ENOMEM; + + cmd_p = virtio_gpu_alloc_cmd_resp(vgdev, + virtio_gpu_cmd_resource_create_cb, &vbuf, sizeof(*cmd_p), + sizeof(struct virtio_gpu_resp_resource_plane_info), resp_buf); memset(cmd_p, 0, sizeof(*cmd_p)); cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D); @@ -882,8 +927,15 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, cmd_p->nr_samples = cpu_to_le32(params->nr_samples); cmd_p->flags = cpu_to_le32(params->flags); + /* Reuse the data_buf pointer for the object pointer. */ + vbuf->data_buf = bo; + bo->create_callback_done = false; + drm_gem_object_get(&bo->gem_base); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); bo->created = true; + + return 0; } void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, @@ -966,6 +1018,9 @@ int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev, struct scatterlist *sg; int si, nents; + if (obj->blob) + return 0; + if (WARN_ON_ONCE(!obj->created)) return -EINVAL; @@ -1041,3 +1096,83 @@ void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev, memcpy(cur_p, &output->cursor, sizeof(output->cursor)); virtio_gpu_queue_cursor(vgdev, vbuf); } + +static void virtio_gpu_cmd_resource_map_cb(struct virtio_gpu_device *vgdev, + struct virtio_gpu_vbuffer *vbuf) +{ + /* + * No-op for v5.4. + */ +} + +void virtio_gpu_cmd_map(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object *bo, + uint64_t offset, + struct virtio_gpu_fence *fence) +{ + struct virtio_gpu_resource_map *cmd_p; + struct virtio_gpu_vbuffer *vbuf; + struct virtio_gpu_resp_map_info *resp_buf; + + resp_buf = kzalloc(sizeof(*resp_buf), GFP_KERNEL); + if (!resp_buf) { + DRM_ERROR("allocation failure\n"); + return; + } + + cmd_p = virtio_gpu_alloc_cmd_resp(vgdev, + virtio_gpu_cmd_resource_map_cb, &vbuf, sizeof(*cmd_p), + sizeof(struct virtio_gpu_resp_map_info), resp_buf); + memset(cmd_p, 0, sizeof(*cmd_p)); + + cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_MAP); + cmd_p->resource_id = cpu_to_le32(bo->hw_res_handle); + cmd_p->offset = offset; + + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, &cmd_p->hdr, fence); +} + +void virtio_gpu_cmd_unmap(struct virtio_gpu_device *vgdev, + uint32_t resource_id) +{ + struct virtio_gpu_resource_unmap *cmd_p; + struct virtio_gpu_vbuffer *vbuf; + + cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); + memset(cmd_p, 0, sizeof(*cmd_p)); + + cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_UNMAP); + cmd_p->resource_id = cpu_to_le32(resource_id); + + virtio_gpu_queue_ctrl_buffer(vgdev, vbuf); +} + +void +virtio_gpu_cmd_resource_create_blob(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object *bo, + uint32_t ctx_id, uint32_t blob_mem, + uint32_t blob_flags, uint64_t blob_id, + uint64_t size, uint32_t nents, + struct virtio_gpu_mem_entry *ents) +{ + struct virtio_gpu_resource_create_blob *cmd_p; + struct virtio_gpu_vbuffer *vbuf; + + cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); + memset(cmd_p, 0, sizeof(*cmd_p)); + + cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB); + cmd_p->hdr.ctx_id = cpu_to_le32(ctx_id); + cmd_p->resource_id = cpu_to_le32(bo->hw_res_handle); + cmd_p->blob_mem = cpu_to_le32(blob_mem); + cmd_p->blob_flags = cpu_to_le32(blob_flags); + cmd_p->blob_id = cpu_to_le64(blob_id); + cmd_p->size = cpu_to_le64(size); + cmd_p->nr_entries = cpu_to_le32(nents); + + vbuf->data_buf = ents; + vbuf->data_size = sizeof(*ents) * nents; + + virtio_gpu_queue_ctrl_buffer(vgdev, vbuf); + bo->created = true; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 36c7b6c839c0dd230752cba96c348b40e2c65c65..738ad2fc79a258a3719fb972b92cedeedda7cd0b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -210,8 +210,10 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man, cres->hash.key = user_key | (res_type << 24); ret = drm_ht_insert_item(&man->resources, &cres->hash); - if (unlikely(ret != 0)) + if (unlikely(ret != 0)) { + kfree(cres); goto out_invalid_key; + } cres->state = VMW_CMDBUF_RES_ADD; cres->res = vmw_resource_reference(res); diff --git a/drivers/gpu/trace/Kconfig b/drivers/gpu/trace/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..c24e9edd022e68c73e1a9ddc1ff600ef54646dbc --- /dev/null +++ b/drivers/gpu/trace/Kconfig @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config TRACE_GPU_MEM + bool diff --git a/drivers/gpu/trace/Makefile b/drivers/gpu/trace/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b70fbdc5847f002a70df7a57d9d0da9c518bb86e --- /dev/null +++ b/drivers/gpu/trace/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_TRACE_GPU_MEM) += trace_gpu_mem.o diff --git a/drivers/gpu/trace/trace_gpu_mem.c b/drivers/gpu/trace/trace_gpu_mem.c new file mode 100644 index 0000000000000000000000000000000000000000..01e855897b6d093d4cd84cc1df13194121baec4d --- /dev/null +++ b/drivers/gpu/trace/trace_gpu_mem.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * GPU memory trace points + * + * Copyright (C) 2020 Google, Inc. + */ + +#include + +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL(gpu_mem_total); diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index d0a81a03ddbdd5cb206db6760a2954feaff0de41..8ab8f2350bbcdc7f28cccfc15d195807c503c32f 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -343,7 +343,8 @@ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi, unsigned long **bit, int *max) { if (usage->hid == (HID_UP_CUSTOM | 0x0003) || - usage->hid == (HID_UP_MSVENDOR | 0x0003)) { + usage->hid == (HID_UP_MSVENDOR | 0x0003) || + usage->hid == (HID_UP_HPVENDOR2 | 0x0003)) { /* The fn key on Apple USB keyboards */ set_bit(EV_REP, hi->input->evbit); hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 157adae2facb184d43e3e03d2790b14f46b658d2..1f2e9c9279e86d85710b35130c6ee4fcf5270ddc 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1567,7 +1567,9 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, rsize = ((report->size - 1) >> 3) + 1; - if (rsize > HID_MAX_BUFFER_SIZE) + if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE) + rsize = HID_MAX_BUFFER_SIZE - 1; + else if (rsize > HID_MAX_BUFFER_SIZE) rsize = HID_MAX_BUFFER_SIZE; if (csize < rsize) { diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 2ce1eb0c9212531814055177e6820fcdc18eb947..f2e23f81601e7472fe2a3d8f2e26cc7432b50dbe 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -44,8 +44,9 @@ static const struct hid_device_id ite_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) }, { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) }, /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */ - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index 10af8585c820d253b71301f23efae5db3b40fc14..95052373a828240809c25581b944f1feb2587944 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -341,6 +341,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + .ident = "Trekstor SURFBOOK E11B", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SURFBOOK E11B"), + }, + .driver_data = (void *)&sipodev_desc + }, { .ident = "Direkt-Tek DTLAPY116-2", .matches = { diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index bccd97cdc53f9fc5bb8052a978b73f7035717e1e..d9602f3a359e158e17f2a451a71057561156ec4f 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -954,9 +954,9 @@ void hiddev_disconnect(struct hid_device *hid) hiddev->exist = 0; if (hiddev->open) { - mutex_unlock(&hiddev->existancelock); hid_hw_close(hiddev->hid); wake_up_interruptible(&hiddev->wait); + mutex_unlock(&hiddev->existancelock); } else { mutex_unlock(&hiddev->existancelock); kfree(hiddev); diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c index 19f2a6d48bac99d7232851ba27c8d121ab4007e5..bdd7679fd29873c61f3118b4d6f91091927e50b5 100644 --- a/drivers/hwmon/adt7462.c +++ b/drivers/hwmon/adt7462.c @@ -426,7 +426,7 @@ static int ADT7462_REG_VOLT(struct adt7462_data *data, int which) return 0x95; break; } - return -ENODEV; + return 0; } /* Provide labels for sysfs */ diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c index e5234f953a6d16213920df252ac8ef23d857f924..b6e5aaa54963d6b30e35f2db363cf376176f5483 100644 --- a/drivers/hwmon/jc42.c +++ b/drivers/hwmon/jc42.c @@ -527,7 +527,7 @@ static int jc42_probe(struct i2c_client *client, const struct i2c_device_id *id) } data->config = config; - hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name, + hwmon_dev = devm_hwmon_device_register_with_info(dev, "jc42", data, &jc42_chip_info, NULL); return PTR_ERR_OR_ZERO(hwmon_dev); diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c index 58b789c28b483f062b17822a32487be6bdba14ee..94eea2ac62519625024a5bb0f4340304f65fc8c9 100644 --- a/drivers/hwmon/pmbus/ltc2978.c +++ b/drivers/hwmon/pmbus/ltc2978.c @@ -89,8 +89,8 @@ enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882, #define LTC_POLL_TIMEOUT 100 /* in milli-seconds */ -#define LTC_NOT_BUSY BIT(5) -#define LTC_NOT_PENDING BIT(4) +#define LTC_NOT_BUSY BIT(6) +#define LTC_NOT_PENDING BIT(5) /* * LTC2978 clears peak data whenever the CLEAR_FAULTS command is executed, which diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 6ebf6a2edb3325bbee48bd0c6e4a29c83600514e..ca2717137ad22d0f6b9eb47dea5dc408d5024b22 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -499,7 +499,7 @@ static int msc_configure(struct msc *msc) lockdep_assert_held(&msc->buf_mutex); if (msc->mode > MSC_MODE_MULTI) - return -ENOTSUPP; + return -EINVAL; if (msc->mode == MSC_MODE_MULTI) msc_buffer_clear_hw_header(msc); @@ -950,7 +950,7 @@ static int msc_buffer_alloc(struct msc *msc, unsigned long *nr_pages, } else if (msc->mode == MSC_MODE_MULTI) { ret = msc_buffer_multi_alloc(msc, nr_pages, nr_wins); } else { - ret = -ENOTSUPP; + ret = -EINVAL; } if (!ret) { @@ -1173,7 +1173,7 @@ static ssize_t intel_th_msc_read(struct file *file, char __user *buf, if (ret >= 0) *ppos = iter->offset; } else { - ret = -ENOTSUPP; + ret = -EINVAL; } put_count: diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index fc371444407d43d537acda150501b4d597d4b0d5..b8cbd26b60e188b9db7ff7f81b6c527aaba4d360 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -218,6 +218,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Elkhart Lake CPU */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4529), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Elkhart Lake */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4b26), diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c index f5e1941e65b5a2dd6cebfd8a2f60798e258b986b..8915ee30a5b44ee3dc7ad7019d7dc0c818c1daf1 100644 --- a/drivers/i2c/busses/i2c-altera.c +++ b/drivers/i2c/busses/i2c-altera.c @@ -182,7 +182,7 @@ static void altr_i2c_init(struct altr_i2c_dev *idev) /* SCL Low Time */ writel(t_low, idev->base + ALTR_I2C_SCL_LOW); /* SDA Hold Time, 300ns */ - writel(div_u64(300 * clk_mhz, 1000), idev->base + ALTR_I2C_SDA_HOLD); + writel(3 * clk_mhz / 10, idev->base + ALTR_I2C_SDA_HOLD); /* Mask all master interrupt bits */ altr_i2c_int_enable(idev, ALTR_I2C_ALL_IRQ, false); @@ -395,7 +395,6 @@ static int altr_i2c_probe(struct platform_device *pdev) struct altr_i2c_dev *idev = NULL; struct resource *res; int irq, ret; - u32 val; idev = devm_kzalloc(&pdev->dev, sizeof(*idev), GFP_KERNEL); if (!idev) @@ -422,17 +421,17 @@ static int altr_i2c_probe(struct platform_device *pdev) init_completion(&idev->msg_complete); spin_lock_init(&idev->lock); - val = device_property_read_u32(idev->dev, "fifo-size", + ret = device_property_read_u32(idev->dev, "fifo-size", &idev->fifo_size); - if (val) { + if (ret) { dev_err(&pdev->dev, "FIFO size set to default of %d\n", ALTR_I2C_DFLT_FIFO_SZ); idev->fifo_size = ALTR_I2C_DFLT_FIFO_SZ; } - val = device_property_read_u32(idev->dev, "clock-frequency", + ret = device_property_read_u32(idev->dev, "clock-frequency", &idev->bus_clk_rate); - if (val) { + if (ret) { dev_err(&pdev->dev, "Default to 100kHz\n"); idev->bus_clk_rate = 100000; /* default clock rate */ } diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c index bb68957d3da5e6846169ae1b26bfd37f9e6d1caa..aa5c55bd8b112ba1a852e5377153e736c33f6226 100644 --- a/drivers/i2c/busses/i2c-hix5hd2.c +++ b/drivers/i2c/busses/i2c-hix5hd2.c @@ -498,6 +498,7 @@ static int hix5hd2_i2c_remove(struct platform_device *pdev) i2c_del_adapter(&priv->adap); pm_runtime_disable(priv->dev); pm_runtime_set_suspended(priv->dev); + clk_disable_unprepare(priv->clk); return 0; } diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 30132c3957cdc3d2d76e03a5a82511d9b0a27829..41ca9ff7b5da75a7b418f61c7a87f028c28403c6 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -82,25 +82,6 @@ #define JZ4780_I2C_STA_TFNF BIT(1) #define JZ4780_I2C_STA_ACT BIT(0) -static const char * const jz4780_i2c_abrt_src[] = { - "ABRT_7B_ADDR_NOACK", - "ABRT_10ADDR1_NOACK", - "ABRT_10ADDR2_NOACK", - "ABRT_XDATA_NOACK", - "ABRT_GCALL_NOACK", - "ABRT_GCALL_READ", - "ABRT_HS_ACKD", - "SBYTE_ACKDET", - "ABRT_HS_NORSTRT", - "SBYTE_NORSTRT", - "ABRT_10B_RD_NORSTRT", - "ABRT_MASTER_DIS", - "ARB_LOST", - "SLVFLUSH_TXFIFO", - "SLV_ARBLOST", - "SLVRD_INTX", -}; - #define JZ4780_I2C_INTST_IGC BIT(11) #define JZ4780_I2C_INTST_ISTT BIT(10) #define JZ4780_I2C_INTST_ISTP BIT(9) @@ -538,21 +519,8 @@ static irqreturn_t jz4780_i2c_irq(int irqno, void *dev_id) static void jz4780_i2c_txabrt(struct jz4780_i2c *i2c, int src) { - int i; - - dev_err(&i2c->adap.dev, "txabrt: 0x%08x\n", src); - dev_err(&i2c->adap.dev, "device addr=%x\n", - jz4780_i2c_readw(i2c, JZ4780_I2C_TAR)); - dev_err(&i2c->adap.dev, "send cmd count:%d %d\n", - i2c->cmd, i2c->cmd_buf[i2c->cmd]); - dev_err(&i2c->adap.dev, "receive data count:%d %d\n", - i2c->cmd, i2c->data_buf[i2c->cmd]); - - for (i = 0; i < 16; i++) { - if (src & BIT(i)) - dev_dbg(&i2c->adap.dev, "I2C TXABRT[%d]=%s\n", - i, jz4780_i2c_abrt_src[i]); - } + dev_dbg(&i2c->adap.dev, "txabrt: 0x%08x, cmd: %d, send: %d, recv: %d\n", + src, i2c->cmd, i2c->cmd_buf[i2c->cmd], i2c->data_buf[i2c->cmd]); } static inline int jz4780_i2c_xfer_read(struct jz4780_i2c *i2c, diff --git a/drivers/i2c/busses/i2c-st.c b/drivers/i2c/busses/i2c-st.c index 9e62f893958aa0016c14e74564b9025f8159c99e..81158ae8bfe361f3416f541651cfc0f2b3287762 100644 --- a/drivers/i2c/busses/i2c-st.c +++ b/drivers/i2c/busses/i2c-st.c @@ -437,6 +437,7 @@ static void st_i2c_wr_fill_tx_fifo(struct st_i2c_dev *i2c_dev) /** * st_i2c_rd_fill_tx_fifo() - Fill the Tx FIFO in read mode * @i2c_dev: Controller's private data + * @max: Maximum amount of data to fill into the Tx FIFO * * This functions fills the Tx FIFO with fixed pattern when * in read mode to trigger clock. diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index df9800aaeac71fc5778557d99121364dc6b61dbe..0d4d5dcf94f3901442a707aaa42a1725ca7b3456 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -352,10 +352,18 @@ static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev) { struct device *dev; + struct i2c_client *client; dev = bus_find_device(&i2c_bus_type, NULL, adev, i2c_acpi_find_match_device); - return dev ? i2c_verify_client(dev) : NULL; + if (!dev) + return NULL; + + client = i2c_verify_client(dev); + if (!client) + put_device(dev); + + return client; } static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value, diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c index b127ed60c73364f34a114a8a3033aa42c906b8ca..9dde8390da09bd95e91c2885d1253612c4b82ff4 100644 --- a/drivers/ide/cmd64x.c +++ b/drivers/ide/cmd64x.c @@ -65,6 +65,9 @@ static void cmd64x_program_timings(ide_drive_t *drive, u8 mode) struct ide_timing t; u8 arttim = 0; + if (drive->dn >= ARRAY_SIZE(drwtim_regs)) + return; + ide_timing_compute(drive, mode, &t, T, 0); /* diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c index a97affca18abe153be1ba4b3285d25390171e69c..0f57d45484d1d97933ca6dba3fbedcf8cfcf081e 100644 --- a/drivers/ide/serverworks.c +++ b/drivers/ide/serverworks.c @@ -114,6 +114,9 @@ static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) struct pci_dev *dev = to_pci_dev(hwif->dev); const u8 pio = drive->pio_mode - XFER_PIO_0; + if (drive->dn >= ARRAY_SIZE(drive_pci)) + return; + pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]); if (svwks_csb_check(dev)) { @@ -140,6 +143,9 @@ static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) u8 ultra_enable = 0, ultra_timing = 0, dma_timing = 0; + if (drive->dn >= ARRAY_SIZE(drive_pci2)) + return; + pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing); pci_read_config_byte(dev, 0x54, &ultra_enable); diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index 07246a6037e319d863b4ed35853bc6098f50e0f7..f64781d03d5d3503f88f16ec32aee34e26bd8d5c 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -543,7 +543,7 @@ static const struct iio_info ad7797_info = { .read_raw = &ad7793_read_raw, .write_raw = &ad7793_write_raw, .write_raw_get_fmt = &ad7793_write_raw_get_fmt, - .attrs = &ad7793_attribute_group, + .attrs = &ad7797_attribute_group, .validate_trigger = ad_sd_validate_trigger, .driver_module = THIS_MODULE, }; diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index a70ef7fec95f05065295a41280d7b17632aac7c9..34639ee2d2ce69f3d13ef539e142c1ac195c7330 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -300,6 +300,27 @@ static const struct iio_chan_spec at91_adc_channels[] = { + AT91_SAMA5D2_DIFF_CHAN_CNT + 1), }; +static int at91_adc_chan_xlate(struct iio_dev *indio_dev, int chan) +{ + int i; + + for (i = 0; i < indio_dev->num_channels; i++) { + if (indio_dev->channels[i].scan_index == chan) + return i; + } + return -EINVAL; +} + +static inline struct iio_chan_spec const * +at91_adc_chan_get(struct iio_dev *indio_dev, int chan) +{ + int index = at91_adc_chan_xlate(indio_dev, chan); + + if (index < 0) + return NULL; + return indio_dev->channels + index; +} + static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) { struct iio_dev *indio = iio_trigger_get_drvdata(trig); @@ -317,7 +338,24 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) at91_adc_writel(st, AT91_SAMA5D2_TRGR, status); for_each_set_bit(bit, indio->active_scan_mask, indio->num_channels) { - struct iio_chan_spec const *chan = indio->channels + bit; + struct iio_chan_spec const *chan = at91_adc_chan_get(indio, bit); + u32 cor; + + if (!chan) + continue; + if (state) { + cor = at91_adc_readl(st, AT91_SAMA5D2_COR); + + if (chan->differential) + cor |= (BIT(chan->channel) | + BIT(chan->channel2)) << + AT91_SAMA5D2_COR_DIFF_OFFSET; + else + cor &= ~(BIT(chan->channel) << + AT91_SAMA5D2_COR_DIFF_OFFSET); + + at91_adc_writel(st, AT91_SAMA5D2_COR, cor); + } if (state) { at91_adc_writel(st, AT91_SAMA5D2_CHER, @@ -398,8 +436,11 @@ static irqreturn_t at91_adc_trigger_handler(int irq, void *p) u8 bit; for_each_set_bit(bit, indio->active_scan_mask, indio->num_channels) { - struct iio_chan_spec const *chan = indio->channels + bit; + struct iio_chan_spec const *chan = + at91_adc_chan_get(indio, bit); + if (!chan) + continue; st->buffer[i] = at91_adc_readl(st, chan->address); i++; } diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 258a4712167a26ac0dc4cccb249bc8025d997f69..3cfb2d4b24412f63351f39ef931669600b72ca74 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1311,8 +1311,30 @@ static unsigned int stm32_adc_dma_residue(struct stm32_adc *adc) static void stm32_adc_dma_buffer_done(void *data) { struct iio_dev *indio_dev = data; + struct stm32_adc *adc = iio_priv(indio_dev); + int residue = stm32_adc_dma_residue(adc); + + /* + * In DMA mode the trigger services of IIO are not used + * (e.g. no call to iio_trigger_poll). + * Calling irq handler associated to the hardware trigger is not + * relevant as the conversions have already been done. Data + * transfers are performed directly in DMA callback instead. + * This implementation avoids to call trigger irq handler that + * may sleep, in an atomic context (DMA irq handler context). + */ + dev_dbg(&indio_dev->dev, "%s bufi=%d\n", __func__, adc->bufi); - iio_trigger_poll_chained(indio_dev->trig); + while (residue >= indio_dev->scan_bytes) { + u16 *buffer = (u16 *)&adc->rx_buf[adc->bufi]; + + iio_push_to_buffers(indio_dev, buffer); + + residue -= indio_dev->scan_bytes; + adc->bufi += indio_dev->scan_bytes; + if (adc->bufi >= adc->rx_buf_sz) + adc->bufi = 0; + } } static int stm32_adc_dma_start(struct iio_dev *indio_dev) @@ -1648,6 +1670,7 @@ static int stm32_adc_probe(struct platform_device *pdev) { struct iio_dev *indio_dev; struct device *dev = &pdev->dev; + irqreturn_t (*handler)(int irq, void *p) = NULL; struct stm32_adc *adc; int ret; @@ -1730,9 +1753,11 @@ static int stm32_adc_probe(struct platform_device *pdev) if (ret < 0) goto err_clk_disable; + if (!adc->dma_chan) + handler = &stm32_adc_trigger_handler; + ret = iio_triggered_buffer_setup(indio_dev, - &iio_pollfunc_store_time, - &stm32_adc_trigger_handler, + &iio_pollfunc_store_time, handler, &stm32_adc_buffer_setup_ops); if (ret) { dev_err(&pdev->dev, "buffer setup failed\n"); diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index e89711b30ae8f01b8fce33a8e8eb781fef238e3b..36db28b9099f5337ea76e45c1e655e0777f16303 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -660,7 +660,7 @@ static int xadc_trigger_set_state(struct iio_trigger *trigger, bool state) spin_lock_irqsave(&xadc->lock, flags); xadc_read_reg(xadc, XADC_AXI_REG_IPIER, &val); - xadc_write_reg(xadc, XADC_AXI_REG_IPISR, val & XADC_AXI_INT_EOS); + xadc_write_reg(xadc, XADC_AXI_REG_IPISR, XADC_AXI_INT_EOS); if (state) val |= XADC_AXI_INT_EOS; else @@ -709,13 +709,14 @@ static int xadc_power_adc_b(struct xadc *xadc, unsigned int seq_mode) { uint16_t val; + /* Powerdown the ADC-B when it is not needed. */ switch (seq_mode) { case XADC_CONF1_SEQ_SIMULTANEOUS: case XADC_CONF1_SEQ_INDEPENDENT: - val = XADC_CONF2_PD_ADC_B; + val = 0; break; default: - val = 0; + val = XADC_CONF2_PD_ADC_B; break; } @@ -784,6 +785,16 @@ static int xadc_preenable(struct iio_dev *indio_dev) if (ret) goto err; + /* + * In simultaneous mode the upper and lower aux channels are samples at + * the same time. In this mode the upper 8 bits in the sequencer + * register are don't care and the lower 8 bits control two channels + * each. As such we must set the bit if either the channel in the lower + * group or the upper group is enabled. + */ + if (seq_mode == XADC_CONF1_SEQ_SIMULTANEOUS) + scan_mask = ((scan_mask >> 8) | scan_mask) & 0xff0000; + ret = xadc_write_adc_reg(xadc, XADC_REG_SEQ(1), scan_mask >> 16); if (ret) goto err; diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c index 0bff76e96950e685ed08e26988ba8057d2eec5cb..283ecd4ea800d5ab7bdea4ae69bf6eefd854c9e7 100644 --- a/drivers/iio/magnetometer/ak8974.c +++ b/drivers/iio/magnetometer/ak8974.c @@ -563,7 +563,7 @@ static int ak8974_read_raw(struct iio_dev *indio_dev, * We read all axes and discard all but one, for optimized * reading, use the triggered buffer. */ - *val = le16_to_cpu(hw_values[chan->address]); + *val = (s16)le16_to_cpu(hw_values[chan->address]); ret = IIO_VAL_INT; } diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index eb212f8c88793b2c1029f087b40733039b5d9f96..0e4da570f22f630d3b000a87681e7bee821703ad 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -161,7 +161,8 @@ static int stm32_timer_start(struct stm32_timer_trigger *priv, return 0; } -static void stm32_timer_stop(struct stm32_timer_trigger *priv) +static void stm32_timer_stop(struct stm32_timer_trigger *priv, + struct iio_trigger *trig) { u32 ccer, cr1; @@ -179,6 +180,12 @@ static void stm32_timer_stop(struct stm32_timer_trigger *priv) regmap_write(priv->regmap, TIM_PSC, 0); regmap_write(priv->regmap, TIM_ARR, 0); + /* Force disable master mode */ + if (stm32_timer_is_trgo2_name(trig->name)) + regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS2, 0); + else + regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS, 0); + /* Make sure that registers are updated */ regmap_update_bits(priv->regmap, TIM_EGR, TIM_EGR_UG, TIM_EGR_UG); } @@ -197,7 +204,7 @@ static ssize_t stm32_tt_store_frequency(struct device *dev, return ret; if (freq == 0) { - stm32_timer_stop(priv); + stm32_timer_stop(priv, trig); } else { ret = stm32_timer_start(priv, trig, freq); if (ret) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2af79e4f3235934252506d4e51c2f6833f8a3f67..80a8eb7e5d6ec1df23b236e9317963f75a075203 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1143,6 +1143,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, /* Sharing an ib_cm_id with different handlers is not * supported */ spin_unlock_irqrestore(&cm.lock, flags); + ib_destroy_cm_id(cm_id); return ERR_PTR(-EINVAL); } atomic_inc(&cm_id_priv->refcount); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 1614f6f3677c0b50560d1be5b61d7d84e8f349a2..d901591db9c8eb62d9f5630d567d8437f3a33975 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2661,6 +2661,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) err2: kfree(route->path_rec); route->path_rec = NULL; + route->num_paths = 0; err1: kfree(work); return ret; diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 30d7277249b8abb63f03f5d78bca665b0682727f..16b0c10348e84194880d5fe1f936d714de980ac6 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -158,8 +158,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; - list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { + list_del(e); kfree(list_entry(e, struct iwcm_work, free_list)); + } } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index a3dd88c57be78295ee23bd538013ddc275cfbd1d..f4770601e6bfa72d1ccb5def067983454ceee217 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -338,27 +338,20 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, if (!new_pps) return NULL; - if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) { - if (!qp_pps) { - new_pps->main.port_num = qp_attr->port_num; - new_pps->main.pkey_index = qp_attr->pkey_index; - } else { - new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ? - qp_attr->port_num : - qp_pps->main.port_num; - - new_pps->main.pkey_index = - (qp_attr_mask & IB_QP_PKEY_INDEX) ? - qp_attr->pkey_index : - qp_pps->main.pkey_index; - } - new_pps->main.state = IB_PORT_PKEY_VALID; - } else if (qp_pps) { + if (qp_attr_mask & IB_QP_PORT) + new_pps->main.port_num = qp_attr->port_num; + else if (qp_pps) new_pps->main.port_num = qp_pps->main.port_num; + + if (qp_attr_mask & IB_QP_PKEY_INDEX) + new_pps->main.pkey_index = qp_attr->pkey_index; + else if (qp_pps) new_pps->main.pkey_index = qp_pps->main.pkey_index; - if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) - new_pps->main.state = IB_PORT_PKEY_VALID; - } + + if (((qp_attr_mask & IB_QP_PKEY_INDEX) && + (qp_attr_mask & IB_QP_PORT)) || + (qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)) + new_pps->main.state = IB_PORT_PKEY_VALID; if (qp_attr_mask & IB_QP_ALT_PATH) { new_pps->alt.port_num = qp_attr->alt_port_num; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 4a0b7c00347718db316989f8c5331ff652e322d3..cb5785dda524e5c7aad967da86104870ed645aa3 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1686,6 +1686,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry, return dd->verbs_dev.n_piodrain; } +static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = context; + + return dd->ctx0_seq_drop; +} + static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { @@ -4246,6 +4254,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_cpu_intr), [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL, access_sw_cpu_rcv_limit), +[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL, + access_sw_ctx0_seq_drop), [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL, access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 50b8645d0b876dbf6d58f56d677e3cfcc7984e55..a88ef2433cea28c1c33e85244fac2356a0104f46 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -864,6 +864,7 @@ enum { C_DC_PG_STS_TX_MBE_CNT, C_SW_CPU_INTR, C_SW_CPU_RCV_LIM, + C_SW_CTX0_SEQ_DROP, C_SW_VTX_WAIT, C_SW_PIO_WAIT, C_SW_PIO_DRAIN, diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 72c836b826ca84d29fffceceea036b753d095722..7aa1aabb7a43c23eeb30f4f099f69d1c362661c1 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -710,6 +710,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; + packet->rcd->dd->ctx0_seq_drop++; /* Set up for the next packet */ packet->rhqoff += packet->rsize; if (packet->rhqoff >= packet->maxcnt) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 76861a8b5c1ec28ccfcdce9a6e9a7ff9909b5260..b3ab803bf8b1e41ad85dd433f796aff32584f93b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -195,23 +195,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd = kzalloc(sizeof(*fd), GFP_KERNEL); - if (fd) { - fd->rec_cpu_num = -1; /* no cpu affinity by default */ - fd->mm = current->mm; - mmgrab(fd->mm); - fd->dd = dd; - kobject_get(&fd->dd->kobj); - fp->private_data = fd; - } else { - fp->private_data = NULL; - - if (atomic_dec_and_test(&dd->user_refcount)) - complete(&dd->user_comp); - - return -ENOMEM; - } - + if (!fd || init_srcu_struct(&fd->pq_srcu)) + goto nomem; + spin_lock_init(&fd->pq_rcu_lock); + spin_lock_init(&fd->tid_lock); + spin_lock_init(&fd->invalid_lock); + fd->rec_cpu_num = -1; /* no cpu affinity by default */ + fd->mm = current->mm; + mmgrab(fd->mm); + fd->dd = dd; + kobject_get(&fd->dd->kobj); + fp->private_data = fd; return 0; +nomem: + kfree(fd); + fp->private_data = NULL; + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + return -ENOMEM; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -417,21 +418,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) { struct hfi1_filedata *fd = kiocb->ki_filp->private_data; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; int done = 0, reqs = 0; unsigned long dim = from->nr_segs; + int idx; - if (!cq || !pq) + idx = srcu_read_lock(&fd->pq_srcu); + pq = srcu_dereference(fd->pq, &fd->pq_srcu); + if (!cq || !pq) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EIO; + } - if (!iter_is_iovec(from) || !dim) + if (!iter_is_iovec(from) || !dim) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EINVAL; + } trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); - if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) + if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { + srcu_read_unlock(&fd->pq_srcu, idx); return -ENOSPC; + } while (dim) { int ret; @@ -449,6 +459,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) reqs++; } + srcu_read_unlock(&fd->pq_srcu, idx); return reqs; } @@ -824,6 +835,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) if (atomic_dec_and_test(&dd->user_refcount)) complete(&dd->user_comp); + cleanup_srcu_struct(&fdata->pq_srcu); kfree(fdata); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index af550c1767e31ea7a9501aff0e18e4024e5f6686..cf9bc95d80396b74803494e255a06428a4f6f584 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1043,6 +1043,8 @@ struct hfi1_devdata { char *boardname; /* human readable board info */ + u64 ctx0_seq_drop; + /* reset value */ u64 z_int_counter; u64 z_rcv_limit; @@ -1353,10 +1355,13 @@ struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { + struct srcu_struct pq_srcu; struct hfi1_devdata *dd; struct hfi1_ctxtdata *uctxt; struct hfi1_user_sdma_comp_q *cq; - struct hfi1_user_sdma_pkt_q *pq; + /* update side lock for SRCU */ + spinlock_t pq_rcu_lock; + struct hfi1_user_sdma_pkt_q __rcu *pq; u16 subctxt; /* for cpu affinity; -1 if none */ int rec_cpu_num; diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 25e867393463e5cc634d9a304e85dba8f5ddb89d..e3e8d65646e34cd80adcd0f1a56d4f053990e888 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -670,7 +670,11 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping sc2vl sysfs info, (err %d) port %u\n", ret, port_num); - goto bail; + /* + * Based on the documentation for kobject_init_and_add(), the + * caller should call kobject_put even if this call fails. + */ + goto bail_sc2vl; } kobject_uevent(&ppd->sc2vl_kobj, KOBJ_ADD); @@ -680,7 +684,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping sl2sc sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sc2vl; + goto bail_sl2sc; } kobject_uevent(&ppd->sl2sc_kobj, KOBJ_ADD); @@ -690,7 +694,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping vl2mtu sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sl2sc; + goto bail_vl2mtu; } kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD); @@ -700,7 +704,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping Congestion Control sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_vl2mtu; + goto bail_cc; } kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); @@ -738,7 +742,6 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, kobject_put(&ppd->sl2sc_kobj); bail_sc2vl: kobject_put(&ppd->sc2vl_kobj); -bail: return ret; } @@ -858,8 +861,13 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) device_remove_file(&dev->dev, hfi1_attributes[i]); - for (i = 0; i < dd->num_sdma; i++) - kobject_del(&dd->per_sdma[i].kobj); + /* + * The function kobject_put() will call kobject_del() if the kobject + * has been added successfully. The sysfs files created under the + * kobject directory will also be removed during the process. + */ + for (; i >= 0; i--) + kobject_put(&dd->per_sdma[i].kobj); return ret; } @@ -872,6 +880,10 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; int i; + /* Unwind operations in hfi1_verbs_register_sysfs() */ + for (i = 0; i < dd->num_sdma; i++) + kobject_put(&dd->per_sdma[i].kobj); + for (i = 0; i < dd->num_pports; i++) { ppd = &dd->pport[i]; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index b38e3808836ca735c451fdd11c4275e395c751d0..c6d085e1c10d242025296b9bea3c7bb449451f75 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -90,9 +90,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_devdata *dd = uctxt->dd; int ret = 0; - spin_lock_init(&fd->tid_lock); - spin_lock_init(&fd->invalid_lock); - fd->entry_to_rb = kcalloc(uctxt->expected_count, sizeof(struct rb_node *), GFP_KERNEL); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 4854a4a453b5f8340e1cbb38ec55a73620c0e2ab..f23d47194c1266b90c094a084fcdf365a49e95a2 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -179,7 +179,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, pq = kzalloc(sizeof(*pq), GFP_KERNEL); if (!pq) return -ENOMEM; - pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; @@ -236,7 +235,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, goto pq_mmu_fail; } - fd->pq = pq; + rcu_assign_pointer(fd->pq, pq); fd->cq = cq; return 0; @@ -264,8 +263,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); - pq = fd->pq; + spin_lock(&fd->pq_rcu_lock); + pq = srcu_dereference_check(fd->pq, &fd->pq_srcu, + lockdep_is_held(&fd->pq_rcu_lock)); if (pq) { + rcu_assign_pointer(fd->pq, NULL); + spin_unlock(&fd->pq_rcu_lock); + synchronize_srcu(&fd->pq_srcu); + /* at this point there can be no more new requests */ if (pq->handler) hfi1_mmu_rb_unregister(pq->handler); iowait_sdma_drain(&pq->busy); @@ -277,7 +282,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); kfree(pq); - fd->pq = NULL; + } else { + spin_unlock(&fd->pq_rcu_lock); } if (fd->cq) { vfree(fd->cq->comps); @@ -321,7 +327,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, { int ret = 0, i; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq = + srcu_dereference(fd->pq, &fd->pq_srcu); struct hfi1_user_sdma_comp_q *cq = fd->cq; struct hfi1_devdata *dd = pq->dd; unsigned long idx = 0; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index ad78b471c1129cd4afabb2dcbe94e5350be2f68e..b962dbcfe9a78194447017ca10b61756902e152d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -593,10 +593,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, opa_get_lid(packet->dlid, 9B)); if (!mcast) goto drop; + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; if (hfi1_do_pkey_check(packet)) - goto drop; + goto unlock_drop; spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -605,6 +606,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); } + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 7e73a1a6cb67146cc545b2296db034a253947ccf..3f8511104c5b6319d9fc614c5e7073a53681fcf4 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1614,8 +1614,9 @@ static int __mlx4_ib_create_default_rules( int i; for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) { + union ib_flow_spec ib_spec = {}; int ret; - union ib_flow_spec ib_spec; + switch (pdefault_rules->rules_create_list[i]) { case 0: /* no rule */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 84c962820aa2a891d6c3c1655eddc3536f1e398c..d835ef2ce23c09718c59d8d719c42559b5e12fb5 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4362,7 +4362,9 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev, rdma_ah_set_path_bits(ah_attr, path->grh_mlid & 0x7f); rdma_ah_set_static_rate(ah_attr, path->static_rate ? path->static_rate - 5 : 0); - if (path->grh_mlid & (1 << 7)) { + + if (path->grh_mlid & (1 << 7) || + ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { u32 tc_fl = be32_to_cpu(path->tclass_flowlabel); rdma_ah_set_grh(ah_attr, NULL, @@ -4896,6 +4898,10 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); + if (!capable(CAP_SYS_RAWIO) && + init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) + return ERR_PTR(-EPERM); + dev = to_mdev(pd->device); switch (init_attr->wq_type) { case IB_WQT_RQ: diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 350bc29a066f0596d430d9cc935dcee8cf767937..b473df8eea1ade06e0ac3a734013b8406115f3e2 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -360,8 +360,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (mcast == NULL) goto drop; this_cpu_inc(ibp->pmastats->n_multicast_rcv); + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index d1cc89f6f2e33e3f844d1138dedfb2b5cb13c5a5..46c8a66731e6ca04f955ae4eae04539e892a4669 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -408,7 +408,7 @@ struct rxe_dev { struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* guard mmap_offset */ - int mmap_offset; + u64 mmap_offset; atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 9b5691f306a230262103bc25ada8d897838ae14d..ee3f630c92179dac8d4a7351bd43e459d9d72fc2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2582,6 +2582,17 @@ isert_wait4logout(struct isert_conn *isert_conn) } } +static void +isert_wait4cmds(struct iscsi_conn *conn) +{ + isert_info("iscsi_conn %p\n", conn); + + if (conn->sess) { + target_sess_cmd_list_set_waiting(conn->sess->se_sess); + target_wait_for_sess_cmds(conn->sess->se_sess); + } +} + /** * isert_put_unsol_pending_cmds() - Drop commands waiting for * unsolicitate dataout @@ -2629,6 +2640,7 @@ static void isert_wait_conn(struct iscsi_conn *conn) ib_drain_qp(isert_conn->qp); isert_put_unsol_pending_cmds(conn); + isert_wait4cmds(conn); isert_wait4logout(isert_conn); queue_work(isert_release_wq, &isert_conn->release_work); diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 111a71190547d69a59ee97798240ae060e9b095c..2bca84f4c2b2ea08e9bffa14215598d12745406b 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -149,7 +149,6 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN0042", /* Yoga */ "LEN0045", "LEN0047", - "LEN0049", "LEN2000", /* S540 */ "LEN2001", /* Edge E431 */ "LEN2002", /* Edge E531 */ @@ -169,9 +168,11 @@ static const char * const smbus_pnp_ids[] = { /* all of the topbuttonpad_pnp_ids are valid, we just add some extras */ "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ + "LEN0049", /* Yoga 11e */ "LEN004a", /* W541 */ "LEN005b", /* P50 */ "LEN005e", /* T560 */ + "LEN006c", /* T470s */ "LEN0071", /* T480 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ @@ -182,11 +183,13 @@ static const char * const smbus_pnp_ids[] = { "LEN0097", /* X280 -> ALPS trackpoint */ "LEN009b", /* T580 */ "LEN200f", /* T450s */ + "LEN2044", /* L470 */ "LEN2054", /* E480 */ "LEN2055", /* E580 */ "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ + "SYN3257", /* HP Envy 13-ad105ng */ NULL }; diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 136f6e7bf797767256e66c1c083cb80c55cd7a1b..0d0f977a2f393dcabc09e01c5178e2f2117354ca 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -534,6 +534,17 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo LaVie Z"), }, }, + { + /* + * Acer Aspire 5738z + * Touchpad stops working in mux mode when dis- + re-enabled + * with the touchpad enable/disable toggle hotkey + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5738"), + }, + }, { } }; diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 5bf63f76dddac657083bd3ab2ab2b6b0160dd3da..4eff5b44640cf9f57d107548588d432cfcae34d6 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -888,6 +888,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, { const struct edt_i2c_chip_data *chip_data; struct edt_ft5x06_ts_data *tsdata; + u8 buf[2] = { 0xfc, 0x00 }; struct input_dev *input; unsigned long irq_flags; int error; @@ -957,6 +958,12 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, return error; } + /* + * Dummy read access. EP0700MLP1 returns bogus data on the first + * register read access and ignores writes. + */ + edt_ft5x06_ts_readwrite(tsdata->client, 2, buf, 2, buf); + edt_ft5x06_ts_set_regs(tsdata); edt_ft5x06_ts_get_defaults(&client->dev, tsdata); edt_ft5x06_ts_get_parameters(tsdata); diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 4f1d3fd5d4121dbd6c517eda7c862ea9ae6b802e..7da44956555e5b80b03379f707045e2956c3be2b 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -441,7 +441,7 @@ static int raydium_i2c_write_object(struct i2c_client *client, return 0; } -static bool raydium_i2c_boot_trigger(struct i2c_client *client) +static int raydium_i2c_boot_trigger(struct i2c_client *client) { static const u8 cmd[7][6] = { { 0x08, 0x0C, 0x09, 0x00, 0x50, 0xD7 }, @@ -469,7 +469,7 @@ static bool raydium_i2c_boot_trigger(struct i2c_client *client) return 0; } -static bool raydium_i2c_fw_trigger(struct i2c_client *client) +static int raydium_i2c_fw_trigger(struct i2c_client *client) { static const u8 cmd[5][11] = { { 0, 0x09, 0x71, 0x0C, 0x09, 0x00, 0x50, 0xD7, 0, 0, 0 }, diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 4d2920988d6074c4bdb386bacbd88e3aa306f802..6c228144b3da528564d78aa51cbe77554bb9e36d 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2809,7 +2809,7 @@ static int __init parse_amd_iommu_intr(char *str) { for (; *str; ++str) { if (strncmp(str, "legacy", 6) == 0) { - amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; break; } if (strncmp(str, "vapic", 5) == 0) { diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 3054c0971759fb6dede7411c7a93aaa2d9407e43..74c8638aac2b95a035a08604fde95f8eb62261e0 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -348,7 +348,7 @@ #define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) #define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) -#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0xfffffULL) +#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL) #define DTE_GCR3_INDEX_A 0 #define DTE_GCR3_INDEX_B 1 diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index cabacbbae563b52fe4cc8cac9063113498ed4da2..a5711b6cb87ea2f83616a817d3a98cbc88ae3ec4 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1145,7 +1145,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, } arm_smmu_sync_ste_for_sid(smmu, sid); - dst[0] = cpu_to_le64(val); + /* See comment in arm_smmu_write_ctx_desc() */ + WRITE_ONCE(dst[0], cpu_to_le64(val)); arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 3d81e2c8a97e04078797dd9ce2aa121e556252de..b6a62a727b41ae781b99170fb7a48d68eeab96fb 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -212,15 +212,15 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, start -= iova_offset(iovad, start); num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); - msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); - if (!msi_page) - return -ENOMEM; - for (i = 0; i < num_pages; i++) { - msi_page[i].phys = start; - msi_page[i].iova = start; - INIT_LIST_HEAD(&msi_page[i].list); - list_add(&msi_page[i].list, &cookie->msi_page_list); + msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); + if (!msi_page) + return -ENOMEM; + + msi_page->phys = start; + msi_page->iova = start; + INIT_LIST_HEAD(&msi_page->list); + list_add(&msi_page->list, &cookie->msi_page_list); start += iovad->granule; } diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 38d0128b8135d2f5a727ae93a375d1094367d195..1f527ca609550bc0e175afdc0fe68bf44b8fd78e 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -139,6 +140,13 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) BUG_ON(dev->is_virtfn); + /* + * Ignore devices that have a domain number higher than what can + * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000 + */ + if (pci_domain_nr(dev->bus) > U16_MAX) + return NULL; + /* Only generate path[] for device addition event */ if (event == BUS_NOTIFY_ADD_DEVICE) for (tmp = dev; tmp; tmp = tmp->bus->self) @@ -451,12 +459,13 @@ static int __init dmar_parse_one_andd(struct acpi_dmar_header *header, /* Check for NUL termination within the designated length */ if (strnlen(andd->device_name, header->length - 8) == header->length - 8) { - WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; ANDD object name is not NUL-terminated\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return -EINVAL; } pr_info("ANDD device: %x name: %s\n", andd->device_number, @@ -482,14 +491,14 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) return 0; } } - WARN_TAINT( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - drhd->reg_base_addr, + rhsa->base_address, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return 0; } @@ -835,14 +844,14 @@ int __init dmar_table_init(void) static void warn_invalid_dmar(u64 addr, const char *message) { - WARN_TAINT_ONCE( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn_once(FW_BUG "Your BIOS is broken; DMAR reported at address %llx%s!\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", addr, message, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); } static int __ref diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b48666849dbedaff02bbf812647a865cbd0c0ac0..db1b546134f5944ceffa7445740f4677f9e3e424 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3984,10 +3984,11 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) /* we know that the this iommu should be at offset 0xa000 from vtbar */ drhd = dmar_find_matched_drhd_unit(pdev); - if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000, - TAINT_FIRMWARE_WORKAROUND, - "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n")) + if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { + pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + } } DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); @@ -5123,8 +5124,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, u64 phys = 0; pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); - if (pte) - phys = dma_pte_addr(pte); + if (pte && dma_pte_present(pte)) + phys = dma_pte_addr(pte) + + (iova & (BIT_MASK(level_to_offset_bits(level) + + VTD_PAGE_SHIFT) - 1)); return phys; } diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index f5573bb9f450e2a19ec22fde1b001fff3c8d8a02..837459762eb39b9ddfd3263c95690d030ca4ba76 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -613,14 +613,15 @@ static irqreturn_t prq_event_thread(int irq, void *d) * any faults on kernel addresses. */ if (!svm->mm) goto bad_req; - /* If the mm is already defunct, don't handle faults. */ - if (!mmget_not_zero(svm->mm)) - goto bad_req; /* If address is not canonical, return invalid response */ if (!is_canonical_address(address)) goto bad_req; + /* If the mm is already defunct, don't handle faults. */ + if (!mmget_not_zero(svm->mm)) + goto bad_req; + down_read(&svm->mm->mmap_sem); vma = find_extend_vma(svm->mm, address); if (!vma || address < vma->vm_start) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index c8a587d034b0d7e75d276af1b8c7cdc9e02e5aad..920a5df319bc4c650a319b3e3e51bf86834103be 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -327,21 +327,19 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain) { struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); - if (WARN_ON(qcom_domain->iommu)) /* forgot to detach? */ - return; - iommu_put_dma_cookie(domain); - /* NOTE: unmap can be called after client device is powered off, - * for example, with GPUs or anything involving dma-buf. So we - * cannot rely on the device_link. Make sure the IOMMU is on to - * avoid unclocked accesses in the TLB inv path: - */ - pm_runtime_get_sync(qcom_domain->iommu->dev); - - free_io_pgtable_ops(qcom_domain->pgtbl_ops); - - pm_runtime_put_sync(qcom_domain->iommu->dev); + if (qcom_domain->iommu) { + /* + * NOTE: unmap can be called after client device is powered + * off, for example, with GPUs or anything involving dma-buf. + * So we cannot rely on the device_link. Make sure the IOMMU + * is on to avoid unclocked accesses in the TLB inv path: + */ + pm_runtime_get_sync(qcom_domain->iommu->dev); + free_io_pgtable_ops(qcom_domain->pgtbl_ops); + pm_runtime_put_sync(qcom_domain->iommu->dev); + } kfree(qcom_domain); } @@ -386,7 +384,7 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); unsigned i; - if (!qcom_domain->iommu) + if (WARN_ON(!qcom_domain->iommu)) return; pm_runtime_get_sync(qcom_iommu->dev); @@ -397,8 +395,6 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); } pm_runtime_put_sync(qcom_iommu->dev); - - qcom_domain->iommu = NULL; } static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, @@ -779,8 +775,11 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) qcom_iommu->dev = dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res) + if (res) { qcom_iommu->local_base = devm_ioremap_resource(dev, res); + if (IS_ERR(qcom_iommu->local_base)) + return PTR_ERR(qcom_iommu->local_base); + } qcom_iommu->iface_clk = devm_clk_get(dev, "iface"); if (IS_ERR(qcom_iommu->iface_clk)) { diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 52238e6bed392dd068cd4a1b2118af984f13196a..84b23d902d5b86875cd5be3d9fc858a5d4329665 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -527,7 +527,7 @@ static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { its_encode_cmd(cmd, GITS_CMD_INVALL); - its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id); + its_encode_collection(cmd, desc->its_invall_cmd.col->col_id); its_fixup_cmd(cmd); @@ -2591,12 +2591,18 @@ static int its_vpe_set_irqchip_state(struct irq_data *d, return 0; } +static int its_vpe_retrigger(struct irq_data *d) +{ + return !its_vpe_set_irqchip_state(d, IRQCHIP_STATE_PENDING, true); +} + static struct irq_chip its_vpe_irq_chip = { .name = "GICv4-vpe", .irq_mask = its_vpe_mask_irq, .irq_unmask = its_vpe_unmask_irq, .irq_eoi = irq_chip_eoi_parent, .irq_set_affinity = its_vpe_set_affinity, + .irq_retrigger = its_vpe_retrigger, .irq_set_irqchip_state = its_vpe_set_irqchip_state, .irq_set_vcpu_affinity = its_vpe_set_vcpu_affinity, }; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index b99af4dcdabdb906cb817c94f0dc2982cda7d645..f3c81a86e05619b872fce879f773c62e0c638717 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -570,6 +571,8 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs err = handle_domain_irq(gic_data.domain, irqnr, regs); if (err) { WARN_ONCE(true, "Unexpected interrupt received!\n"); + log_abnormal_wakeup_reason( + "unexpected HW IRQ %u", irqnr); if (static_key_true(&supports_deactivate)) { if (irqnr < 8192) gic_write_dir(irqnr); @@ -1453,6 +1456,7 @@ static struct struct redist_region *redist_regs; u32 nr_redist_regions; bool single_redist; + int enabled_rdists; u32 maint_irq; int maint_irq_mode; phys_addr_t vcpu_base; @@ -1547,8 +1551,10 @@ static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header, * If GICC is enabled and has valid gicr base address, then it means * GICR base is presented via GICC */ - if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) + if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) { + acpi_data.enabled_rdists++; return 0; + } /* * It's perfectly valid firmware can pass disabled GICC entry, driver @@ -1578,8 +1584,10 @@ static int __init gic_acpi_count_gicr_regions(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, gic_acpi_match_gicc, 0); - if (count > 0) + if (count > 0) { acpi_data.single_redist = true; + count = acpi_data.enabled_rdists; + } return count; } diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 98b6e1d4b1a68cf8a247f4c75a67d09fd11b6b08..c98358be0bc8b6a4ef0fef7ab84ac531f7fc8ec4 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -231,10 +231,16 @@ static int mbigen_irq_domain_alloc(struct irq_domain *domain, return 0; } +static void mbigen_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + platform_msi_domain_free(domain, virq, nr_irqs); +} + static const struct irq_domain_ops mbigen_domain_ops = { .translate = mbigen_domain_translate, .alloc = mbigen_irq_domain_alloc, - .free = irq_domain_free_irqs_common, + .free = mbigen_irq_domain_free, }; static int mbigen_of_create_domain(struct platform_device *pdev, @@ -381,6 +387,7 @@ static struct platform_driver mbigen_platform_driver = { .name = "Hisilicon MBIGEN-V2", .of_match_table = mbigen_of_match, .acpi_match_table = ACPI_PTR(mbigen_acpi_match), + .suppress_bind_attrs = true, }, .probe = mbigen_device_probe, }; diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c index 928858dada756255cbb4c27c2ae0c191373e6513..f1386733d3bc1ded71f0c7d6ef1bf6692b1cdba1 100644 --- a/drivers/irqchip/irq-versatile-fpga.c +++ b/drivers/irqchip/irq-versatile-fpga.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -68,12 +69,16 @@ static void fpga_irq_unmask(struct irq_data *d) static void fpga_irq_handle(struct irq_desc *desc) { + struct irq_chip *chip = irq_desc_get_chip(desc); struct fpga_irq_data *f = irq_desc_get_handler_data(desc); - u32 status = readl(f->base + IRQ_STATUS); + u32 status; + + chained_irq_enter(chip, desc); + status = readl(f->base + IRQ_STATUS); if (status == 0) { do_bad_IRQ(desc); - return; + goto out; } do { @@ -82,6 +87,9 @@ static void fpga_irq_handle(struct irq_desc *desc) status &= ~(1 << irq); generic_handle_irq(irq_find_mapping(f->domain, irq)); } while (status); + +out: + chained_irq_exit(chip, desc); } /* @@ -204,6 +212,9 @@ int __init fpga_irq_of_init(struct device_node *node, if (of_property_read_u32(node, "valid-mask", &valid_mask)) valid_mask = 0; + writel(clear_mask, base + IRQ_ENABLE_CLEAR); + writel(clear_mask, base + FIQ_ENABLE_CLEAR); + /* Some chips are cascaded from a parent IRQ */ parent_irq = irq_of_parse_and_map(node, 0); if (!parent_irq) { @@ -213,9 +224,6 @@ int __init fpga_irq_of_init(struct device_node *node, fpga_irq_init(base, node->name, 0, parent_irq, valid_mask, node); - writel(clear_mask, base + IRQ_ENABLE_CLEAR); - writel(clear_mask, base + FIQ_ENABLE_CLEAR); - /* * On Versatile AB/PB, some secondary interrupts have a direct * pass-thru to the primary controller for IRQs 20 and 22-31 which need diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c index 3bf9a127181927d84f374c1b60cc9373dafad149..88c7313cf869383d9bad69fcdd0a940158aacf71 100644 --- a/drivers/leds/leds-pca963x.c +++ b/drivers/leds/leds-pca963x.c @@ -43,6 +43,8 @@ #define PCA963X_LED_PWM 0x2 /* Controlled through PWM */ #define PCA963X_LED_GRP_PWM 0x3 /* Controlled through PWM/GRPPWM */ +#define PCA963X_MODE2_OUTDRV 0x04 /* Open-drain or totem pole */ +#define PCA963X_MODE2_INVRT 0x10 /* Normal or inverted direction */ #define PCA963X_MODE2_DMBLNK 0x20 /* Enable blinking */ #define PCA963X_MODE1 0x00 @@ -462,12 +464,12 @@ static int pca963x_probe(struct i2c_client *client, PCA963X_MODE2); /* Configure output: open-drain or totem pole (push-pull) */ if (pdata->outdrv == PCA963X_OPEN_DRAIN) - mode2 |= 0x01; + mode2 &= ~PCA963X_MODE2_OUTDRV; else - mode2 |= 0x05; + mode2 |= PCA963X_MODE2_OUTDRV; /* Configure direction: normal or inverted */ if (pdata->dir == PCA963X_INVERTED) - mode2 |= 0x10; + mode2 |= PCA963X_MODE2_INVRT; i2c_smbus_write_byte_data(pca963x->chip->client, PCA963X_MODE2, mode2); } diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 8d1964b472e7e830d4ee35acaaf7cec8033907d6..0bfde500af196078b048f23b9f441edc63026ad8 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -381,7 +381,8 @@ void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *); /* Bkey utility code */ -#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) +#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \ + (unsigned int)(i)->keys) static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) { diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h index c82578af56a5bbff035a272f258327a7fce6ecfd..2ea0360108e1d41b57a39b5c5df715c52a641442 100644 --- a/drivers/md/dm-bio-record.h +++ b/drivers/md/dm-bio-record.h @@ -20,8 +20,13 @@ struct dm_bio_details { struct gendisk *bi_disk; u8 bi_partno; + int __bi_remaining; unsigned long bi_flags; struct bvec_iter bi_iter; + bio_end_io_t *bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload *bi_integrity; +#endif }; static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) @@ -30,6 +35,11 @@ static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) bd->bi_partno = bio->bi_partno; bd->bi_flags = bio->bi_flags; bd->bi_iter = bio->bi_iter; + bd->__bi_remaining = atomic_read(&bio->__bi_remaining); + bd->bi_end_io = bio->bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + bd->bi_integrity = bio_integrity(bio); +#endif } static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) @@ -38,6 +48,11 @@ static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) bio->bi_partno = bd->bi_partno; bio->bi_flags = bd->bi_flags; bio->bi_iter = bd->bi_iter; + atomic_set(&bio->__bi_remaining, bd->__bi_remaining); + bio->bi_end_io = bd->bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + bio->bi_integrity = bd->bi_integrity; +#endif } #endif diff --git a/drivers/md/dm-bow.c b/drivers/md/dm-bow.c index b92da30a3d42b0cb68b7c873a2b680c5af8f9888..940c6944f0cf7e2b1b10f101b31800ef30c5847c 100644 --- a/drivers/md/dm-bow.c +++ b/drivers/md/dm-bow.c @@ -658,6 +658,7 @@ static int dm_bow_ctr(struct dm_target *ti, unsigned int argc, char **argv) bc->dev->bdev->bd_queue->limits.max_discard_sectors = 1 << 15; bc->forward_trims = false; } else { + bc->dev->bdev->bd_queue->limits.discard_granularity = 1 << 12; bc->forward_trims = true; } @@ -792,6 +793,7 @@ static int prepare_unchanged_range(struct bow_context *bc, struct bow_range *br, */ original_type = br->type; sector0 = backup_br->sector; + bc->trims_total -= range_size(backup_br); if (backup_br->type == TRIMMED) list_del(&backup_br->trimmed_list); backup_br->type = br->type == SECTOR0_CURRENT ? SECTOR0_CURRENT diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index b5f541112fca0b2307bca1648c3d1db02afe88eb..69cdb29ef6be96b49bd096c3c9e58a4a2e22c763 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2971,8 +2971,8 @@ static void cache_postsuspend(struct dm_target *ti) prevent_background_work(cache); BUG_ON(atomic_read(&cache->nr_io_migrations)); - cancel_delayed_work(&cache->waker); - flush_workqueue(cache->wq); + cancel_delayed_work_sync(&cache->waker); + drain_workqueue(cache->wq); WARN_ON(cache->tracker.in_flight); /* diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index b1b68e01b889cd42823252832a572d234965a8ad..53cd31199f212c225c303a43d3024c33ffe9bbae 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -70,6 +70,11 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, arg_name = dm_shift_arg(as); argc--; + if (!arg_name) { + ti->error = "Insufficient feature arguments"; + return -EINVAL; + } + /* * drop_writes */ diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 23f0f4eaaa2e7b7a70376bc90e6188dde2e36079..b6ca5b1100db33af9ad9e7b132cf869eea9e0438 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -187,6 +187,7 @@ struct dm_integrity_c { struct rb_root in_progress; wait_queue_head_t endio_wait; struct workqueue_struct *wait_wq; + struct workqueue_struct *offload_wq; unsigned char commit_seq; commit_id_t commit_ids[N_COMMIT_IDS]; @@ -1157,7 +1158,7 @@ static void dec_in_flight(struct dm_integrity_io *dio) dio->range.logical_sector += dio->range.n_sectors; bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->wait_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } do_endio_flush(ic, dio); @@ -1577,7 +1578,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map if (need_sync_io && from_map) { INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->metadata_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } @@ -3005,6 +3006,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } + ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM, + METADATA_WORKQUEUE_MAX_ACTIVE); + if (!ic->offload_wq) { + ti->error = "Cannot allocate workqueue"; + r = -ENOMEM; + goto bad; + } + ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1); if (!ic->commit_wq) { ti->error = "Cannot allocate workqueue"; @@ -3189,6 +3198,8 @@ static void dm_integrity_dtr(struct dm_target *ti) destroy_workqueue(ic->metadata_wq); if (ic->wait_wq) destroy_workqueue(ic->wait_wq); + if (ic->offload_wq) + destroy_workqueue(ic->offload_wq); if (ic->commit_wq) destroy_workqueue(ic->commit_wq); if (ic->writer_wq) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 776a4f77f76ce6031d54401a23dd39ca89ca6524..052419ac1ce622fcb61853548f23208776bbc9a6 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -439,7 +439,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, fio->level++; if (type == DM_VERITY_BLOCK_TYPE_METADATA) - block += v->data_blocks; + block = block - v->hash_start + v->data_blocks; /* * For RS(M, N), the continuous FEC data is divided into blocks of N @@ -556,6 +556,7 @@ void verity_fec_dtr(struct dm_verity *v) mempool_destroy(f->rs_pool); mempool_destroy(f->prealloc_pool); mempool_destroy(f->extra_pool); + mempool_destroy(f->output_pool); kmem_cache_destroy(f->cache); if (f->data_bufio) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index e3b67b145027bf79000d3e772f3f8e4525c057dc..4d658a0c602587358caca34268f04eb990754227 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1105,7 +1105,6 @@ static int dmz_init_zone(struct dmz_metadata *zmd, struct dm_zone *zone, if (blkz->type == BLK_ZONE_TYPE_CONVENTIONAL) { set_bit(DMZ_RND, &zone->flags); - zmd->nr_rnd_zones++; } else if (blkz->type == BLK_ZONE_TYPE_SEQWRITE_REQ || blkz->type == BLK_ZONE_TYPE_SEQWRITE_PREF) { set_bit(DMZ_SEQ, &zone->flags); diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c index 8a430640c85d512038d4de97960d12768c728190..1a20d0d558d3e88a0cb0b12dda532864e13cff16 100644 --- a/drivers/media/i2c/mt9v032.c +++ b/drivers/media/i2c/mt9v032.c @@ -423,10 +423,12 @@ static int mt9v032_enum_mbus_code(struct v4l2_subdev *subdev, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_mbus_code_enum *code) { + struct mt9v032 *mt9v032 = to_mt9v032(subdev); + if (code->index > 0) return -EINVAL; - code->code = MEDIA_BUS_FMT_SGRBG10_1X10; + code->code = mt9v032->format.code; return 0; } @@ -434,7 +436,11 @@ static int mt9v032_enum_frame_size(struct v4l2_subdev *subdev, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_frame_size_enum *fse) { - if (fse->index >= 3 || fse->code != MEDIA_BUS_FMT_SGRBG10_1X10) + struct mt9v032 *mt9v032 = to_mt9v032(subdev); + + if (fse->index >= 3) + return -EINVAL; + if (mt9v032->format.code != fse->code) return -EINVAL; fse->min_width = MT9V032_WINDOW_WIDTH_DEF / (1 << fse->index); diff --git a/drivers/media/platform/sti/bdisp/bdisp-hw.c b/drivers/media/platform/sti/bdisp/bdisp-hw.c index b7892f3efd988a45da8ebc18b9a2bd9222ed8157..5c4c3f0c57be1c45ea2f258abf2db4602a9bdb8f 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-hw.c +++ b/drivers/media/platform/sti/bdisp/bdisp-hw.c @@ -14,8 +14,8 @@ #define MAX_SRC_WIDTH 2048 /* Reset & boot poll config */ -#define POLL_RST_MAX 50 -#define POLL_RST_DELAY_MS 20 +#define POLL_RST_MAX 500 +#define POLL_RST_DELAY_MS 2 enum bdisp_target_plan { BDISP_RGB, @@ -382,7 +382,7 @@ int bdisp_hw_reset(struct bdisp_dev *bdisp) for (i = 0; i < POLL_RST_MAX; i++) { if (readl(bdisp->regs + BLT_STA1) & BLT_STA1_IDLE) break; - msleep(POLL_RST_DELAY_MS); + udelay(POLL_RST_DELAY_MS * 1000); } if (i == POLL_RST_MAX) dev_err(bdisp->dev, "Reset timeout\n"); diff --git a/drivers/media/platform/ti-vpe/cal.c b/drivers/media/platform/ti-vpe/cal.c index 42e383a48ffe896f4539684a15b7eacd853d4bc5..b6dcae1ecc1be1f60db30af7c7a07dbe6ed4b522 100644 --- a/drivers/media/platform/ti-vpe/cal.c +++ b/drivers/media/platform/ti-vpe/cal.c @@ -544,16 +544,16 @@ static void enable_irqs(struct cal_ctx *ctx) static void disable_irqs(struct cal_ctx *ctx) { + u32 val; + /* Disable IRQ_WDMA_END 0/1 */ - reg_write_field(ctx->dev, - CAL_HL_IRQENABLE_CLR(2), - CAL_HL_IRQ_CLEAR, - CAL_HL_IRQ_MASK(ctx->csi2_port)); + val = 0; + set_field(&val, CAL_HL_IRQ_CLEAR, CAL_HL_IRQ_MASK(ctx->csi2_port)); + reg_write(ctx->dev, CAL_HL_IRQENABLE_CLR(2), val); /* Disable IRQ_WDMA_START 0/1 */ - reg_write_field(ctx->dev, - CAL_HL_IRQENABLE_CLR(3), - CAL_HL_IRQ_CLEAR, - CAL_HL_IRQ_MASK(ctx->csi2_port)); + val = 0; + set_field(&val, CAL_HL_IRQ_CLEAR, CAL_HL_IRQ_MASK(ctx->csi2_port)); + reg_write(ctx->dev, CAL_HL_IRQENABLE_CLR(3), val); /* Todo: Add VC_IRQ and CSI2_COMPLEXIO_IRQ handling */ reg_write(ctx->dev, CAL_CSI2_VC_IRQENABLE(1), 0); } diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c index 427cda457af68e49e09cfaff4a927ddc80ab5292..5104678f29b76a14e96bfae1cf4bbf18141cb7c3 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.c +++ b/drivers/media/usb/b2c2/flexcop-usb.c @@ -510,6 +510,9 @@ static int flexcop_usb_init(struct flexcop_usb *fc_usb) return ret; } + if (fc_usb->uintf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + switch (fc_usb->udev->speed) { case USB_SPEED_LOW: err("cannot handle USB speed because it is too slow."); @@ -543,9 +546,6 @@ static int flexcop_usb_probe(struct usb_interface *intf, struct flexcop_device *fc = NULL; int ret; - if (intf->cur_altsetting->desc.bNumEndpoints < 1) - return -ENODEV; - if ((fc = flexcop_device_kmalloc(sizeof(struct flexcop_usb))) == NULL) { err("out of memory\n"); return -ENOMEM; diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index 1ee7ec55829388e56df80680c7c04938fc0a4459..33dd54c8fa0410f3f0165efe87db72489aa516ba 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c +++ b/drivers/media/usb/dvb-usb/dib0700_core.c @@ -821,7 +821,7 @@ int dib0700_rc_setup(struct dvb_usb_device *d, struct usb_interface *intf) /* Starting in firmware 1.20, the RC info is provided on a bulk pipe */ - if (intf->altsetting[0].desc.bNumEndpoints < rc_ep + 1) + if (intf->cur_altsetting->desc.bNumEndpoints < rc_ep + 1) return -ENODEV; purb = usb_alloc_urb(0, GFP_KERNEL); @@ -841,7 +841,7 @@ int dib0700_rc_setup(struct dvb_usb_device *d, struct usb_interface *intf) * Some devices like the Hauppauge NovaTD model 52009 use an interrupt * endpoint, while others use a bulk one. */ - e = &intf->altsetting[0].endpoint[rc_ep].desc; + e = &intf->cur_altsetting->endpoint[rc_ep].desc; if (usb_endpoint_dir_in(e)) { if (usb_endpoint_xfer_bulk(e)) { pipe = usb_rcvbulkpipe(d->udev, rc_ep); diff --git a/drivers/media/usb/gspca/ov519.c b/drivers/media/usb/gspca/ov519.c index 8106a47a0dd0e8831fb8871020de6a0b1c884611..b51d2de1aca86fb95d75498cfc72302d4e9fc6a7 100644 --- a/drivers/media/usb/gspca/ov519.c +++ b/drivers/media/usb/gspca/ov519.c @@ -3478,6 +3478,11 @@ static void ov511_mode_init_regs(struct sd *sd) return; } + if (alt->desc.bNumEndpoints < 1) { + sd->gspca_dev.usb_err = -ENODEV; + return; + } + packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); reg_w(sd, R51x_FIFO_PSIZE, packet_size >> 5); @@ -3604,6 +3609,11 @@ static void ov518_mode_init_regs(struct sd *sd) return; } + if (alt->desc.bNumEndpoints < 1) { + sd->gspca_dev.usb_err = -ENODEV; + return; + } + packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); ov518_reg_w32(sd, R51x_FIFO_PSIZE, packet_size & ~7, 2); diff --git a/drivers/media/usb/gspca/stv06xx/stv06xx.c b/drivers/media/usb/gspca/stv06xx/stv06xx.c index e72c3e1ab9ff41f0a39d685d4615a8bd3cb06914..9caa5ef9d9e08f1b2bbe9ec9390119355c6e2150 100644 --- a/drivers/media/usb/gspca/stv06xx/stv06xx.c +++ b/drivers/media/usb/gspca/stv06xx/stv06xx.c @@ -289,6 +289,9 @@ static int stv06xx_start(struct gspca_dev *gspca_dev) return -EIO; } + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); err = stv06xx_write_bridge(sd, STV_ISO_SIZE_L, packet_size); if (err < 0) @@ -313,11 +316,21 @@ static int stv06xx_start(struct gspca_dev *gspca_dev) static int stv06xx_isoc_init(struct gspca_dev *gspca_dev) { + struct usb_interface_cache *intfc; struct usb_host_interface *alt; struct sd *sd = (struct sd *) gspca_dev; + intfc = gspca_dev->dev->actconfig->intf_cache[0]; + + if (intfc->num_altsetting < 2) + return -ENODEV; + + alt = &intfc->altsetting[1]; + + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + /* Start isoc bandwidth "negotiation" at max isoc bandwidth */ - alt = &gspca_dev->dev->actconfig->intf_cache[0]->altsetting[1]; alt->endpoint[0].desc.wMaxPacketSize = cpu_to_le16(sd->sensor->max_packet_size[gspca_dev->curr_mode]); @@ -330,6 +343,10 @@ static int stv06xx_isoc_nego(struct gspca_dev *gspca_dev) struct usb_host_interface *alt; struct sd *sd = (struct sd *) gspca_dev; + /* + * Existence of altsetting and endpoint was verified in + * stv06xx_isoc_init() + */ alt = &gspca_dev->dev->actconfig->intf_cache[0]->altsetting[1]; packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); min_packet_size = sd->sensor->min_packet_size[gspca_dev->curr_mode]; diff --git a/drivers/media/usb/gspca/stv06xx/stv06xx_pb0100.c b/drivers/media/usb/gspca/stv06xx/stv06xx_pb0100.c index e1ce96e9405f5ed89a23c5c0a59a7b5290b5cd93..8d855b2756ba0647db08d4eb6b3ecf4f4b840bf8 100644 --- a/drivers/media/usb/gspca/stv06xx/stv06xx_pb0100.c +++ b/drivers/media/usb/gspca/stv06xx/stv06xx_pb0100.c @@ -194,6 +194,10 @@ static int pb0100_start(struct sd *sd) alt = usb_altnum_to_altsetting(intf, sd->gspca_dev.alt); if (!alt) return -ENODEV; + + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); /* If we don't have enough bandwidth use a lower framerate */ diff --git a/drivers/media/usb/gspca/xirlink_cit.c b/drivers/media/usb/gspca/xirlink_cit.c index 68656e7986c752c982e65fea98d563cb12c7f03e..765a5d03e7cc96a9e570f6c7dad725773b3987dd 100644 --- a/drivers/media/usb/gspca/xirlink_cit.c +++ b/drivers/media/usb/gspca/xirlink_cit.c @@ -1451,6 +1451,9 @@ static int cit_get_packet_size(struct gspca_dev *gspca_dev) return -EIO; } + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + return le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); } @@ -2634,6 +2637,7 @@ static int sd_start(struct gspca_dev *gspca_dev) static int sd_isoc_init(struct gspca_dev *gspca_dev) { + struct usb_interface_cache *intfc; struct usb_host_interface *alt; int max_packet_size; @@ -2649,8 +2653,17 @@ static int sd_isoc_init(struct gspca_dev *gspca_dev) break; } + intfc = gspca_dev->dev->actconfig->intf_cache[0]; + + if (intfc->num_altsetting < 2) + return -ENODEV; + + alt = &intfc->altsetting[1]; + + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + /* Start isoc bandwidth "negotiation" at max isoc bandwidth */ - alt = &gspca_dev->dev->actconfig->intf_cache[0]->altsetting[1]; alt->endpoint[0].desc.wMaxPacketSize = cpu_to_le16(max_packet_size); return 0; @@ -2673,6 +2686,9 @@ static int sd_isoc_nego(struct gspca_dev *gspca_dev) break; } + /* + * Existence of altsetting and endpoint was verified in sd_isoc_init() + */ alt = &gspca_dev->dev->actconfig->intf_cache[0]->altsetting[1]; packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); if (packet_size <= min_packet_size) diff --git a/drivers/media/usb/usbtv/usbtv-core.c b/drivers/media/usb/usbtv/usbtv-core.c index 68df16b3ce72030c0b566dc31f93a58d51a040b8..50a61143898b5421874edc7bfb51c3fbce95382a 100644 --- a/drivers/media/usb/usbtv/usbtv-core.c +++ b/drivers/media/usb/usbtv/usbtv-core.c @@ -56,7 +56,7 @@ int usbtv_set_regs(struct usbtv *usbtv, const u16 regs[][2], int size) ret = usb_control_msg(usbtv->udev, pipe, USBTV_REQUEST_REG, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, index, NULL, 0, 0); + value, index, NULL, 0, USB_CTRL_GET_TIMEOUT); if (ret < 0) return ret; } diff --git a/drivers/media/usb/usbtv/usbtv-video.c b/drivers/media/usb/usbtv/usbtv-video.c index 3668a04359e8004bc0ea5c89c91def7c28a568f1..7c23d82313a84e8ea9a71791b4c564d8c2d21c7b 100644 --- a/drivers/media/usb/usbtv/usbtv-video.c +++ b/drivers/media/usb/usbtv/usbtv-video.c @@ -720,7 +720,8 @@ static int usbtv_s_ctrl(struct v4l2_ctrl *ctrl) ret = usb_control_msg(usbtv->udev, usb_rcvctrlpipe(usbtv->udev, 0), USBTV_CONTROL_REG, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0, USBTV_BASE + 0x0244, (void *)data, 3, 0); + 0, USBTV_BASE + 0x0244, (void *)data, 3, + USB_CTRL_GET_TIMEOUT); if (ret < 0) goto error; } @@ -771,7 +772,7 @@ static int usbtv_s_ctrl(struct v4l2_ctrl *ctrl) ret = usb_control_msg(usbtv->udev, usb_sndctrlpipe(usbtv->udev, 0), USBTV_CONTROL_REG, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0, index, (void *)data, size, 0); + 0, index, (void *)data, size, USB_CTRL_SET_TIMEOUT); error: if (ret < 0) diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c index 95d0f2df0ad42d4107c5b9ee282d0fa365f6b6c2..672831d5ee32e1c6ba0737cb99a2f80d0e867373 100644 --- a/drivers/mfd/dln2.c +++ b/drivers/mfd/dln2.c @@ -93,6 +93,11 @@ struct dln2_mod_rx_slots { spinlock_t lock; }; +enum dln2_endpoint { + DLN2_EP_OUT = 0, + DLN2_EP_IN = 1, +}; + struct dln2_dev { struct usb_device *usb_dev; struct usb_interface *interface; @@ -740,10 +745,10 @@ static int dln2_probe(struct usb_interface *interface, hostif->desc.bNumEndpoints < 2) return -ENODEV; - epin = &hostif->endpoint[0].desc; - epout = &hostif->endpoint[1].desc; + epout = &hostif->endpoint[DLN2_EP_OUT].desc; if (!usb_endpoint_is_bulk_out(epout)) return -ENODEV; + epin = &hostif->endpoint[DLN2_EP_IN].desc; if (!usb_endpoint_is_bulk_in(epin)) return -ENODEV; diff --git a/drivers/mfd/rts5227.c b/drivers/mfd/rts5227.c index ff296a4bf3d235a2bd860989b4464be58a79341d..dc6a9432a4b652dae27277474aab71463147642c 100644 --- a/drivers/mfd/rts5227.c +++ b/drivers/mfd/rts5227.c @@ -369,6 +369,7 @@ static const struct pcr_ops rts522a_pcr_ops = { void rts522a_init_params(struct rtsx_pcr *pcr) { rts5227_init_params(pcr); + pcr->ops = &rts522a_pcr_ops; pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3; } diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c index 494e263daa748d30b9a284be67c0fd3c518eebc3..b7ee8043a133ec4d06348284685ba44aa6de2d79 100644 --- a/drivers/misc/altera-stapl/altera.c +++ b/drivers/misc/altera-stapl/altera.c @@ -2126,8 +2126,8 @@ static int altera_execute(struct altera_state *astate, return status; } -static int altera_get_note(u8 *p, s32 program_size, - s32 *offset, char *key, char *value, int length) +static int altera_get_note(u8 *p, s32 program_size, s32 *offset, + char *key, char *value, int keylen, int vallen) /* * Gets key and value of NOTE fields in the JBC file. * Can be called in two modes: if offset pointer is NULL, @@ -2184,7 +2184,7 @@ static int altera_get_note(u8 *p, s32 program_size, &p[note_table + (8 * i) + 4])]; if (value != NULL) - strlcpy(value, value_ptr, length); + strlcpy(value, value_ptr, vallen); } } @@ -2203,13 +2203,13 @@ static int altera_get_note(u8 *p, s32 program_size, strlcpy(key, &p[note_strings + get_unaligned_be32( &p[note_table + (8 * i)])], - length); + keylen); if (value != NULL) strlcpy(value, &p[note_strings + get_unaligned_be32( &p[note_table + (8 * i) + 4])], - length); + vallen); *offset = i + 1; } @@ -2463,7 +2463,7 @@ int altera_init(struct altera_config *config, const struct firmware *fw) __func__, (format_version == 2) ? "Jam STAPL" : "pre-standardized Jam 1.1"); while (altera_get_note((u8 *)fw->data, fw->size, - &offset, key, value, 256) == 0) + &offset, key, value, 32, 256) == 0) printk(KERN_INFO "%s: NOTE \"%s\" = \"%s\"\n", __func__, key, value); } diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c index 9597e9523cac4d7459e1869f69a49f1c80a73b24..fff13176f9b8b1dddb6f9464622d9803d23ca1da 100644 --- a/drivers/misc/echo/echo.c +++ b/drivers/misc/echo/echo.c @@ -454,7 +454,7 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx) */ ec->factor = 0; ec->shift = 0; - if ((ec->nonupdate_dwell == 0)) { + if (!ec->nonupdate_dwell) { int p, logp, shift; /* Determine: diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 230f1e8538dc9c16a810487eb1161ae067c05d8d..953af49dd38a531f7b9f8b45d734f2ab07b6fd74 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -466,7 +466,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, int err; int irq = 0; int id; - char name[20]; + char name[24]; enum pci_barno bar; void __iomem *base; struct device *dev = &pdev->dev; diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c index d04faf312ed64d5f040e8b7f52ef7de7d1c93158..c82424ef3c7fc447674ba5e598a900b53ebe5748 100644 --- a/drivers/misc/uid_sys_stats.c +++ b/drivers/misc/uid_sys_stats.c @@ -126,7 +126,7 @@ static void get_full_task_comm(struct task_entry *task_entry, int i = 0, offset = 0, len = 0; /* save one byte for terminating null character */ int unused_len = MAX_TASK_COMM_LEN - TASK_COMM_LEN - 1; - char buf[unused_len]; + char buf[MAX_TASK_COMM_LEN - TASK_COMM_LEN - 1]; struct mm_struct *mm = task->mm; /* fill the first TASK_COMM_LEN bytes with thread name */ diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 1dadd460cc8fb3ed1af8f65a1172858504c97552..78c9ac33b562166c1731bec085ee49b83d1a80b6 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -125,7 +125,8 @@ static void sdhci_at91_reset(struct sdhci_host *host, u8 mask) { sdhci_reset(host, mask); - if (host->mmc->caps & MMC_CAP_NONREMOVABLE) + if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) + || mmc_gpio_get_cd(host->mmc) >= 0) sdhci_at91_set_force_card_detect(host); } @@ -404,8 +405,11 @@ static int sdhci_at91_probe(struct platform_device *pdev) * detection procedure using the SDMCC_CD signal is bypassed. * This bit is reset when a software reset for all command is performed * so we need to implement our own reset function to set back this bit. + * + * WA: SAMA5D2 doesn't drive CMD if using CD GPIO line. */ - if (host->mmc->caps & MMC_CAP_NONREMOVABLE) + if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) + || mmc_gpio_get_cd(host->mmc) >= 0) sdhci_at91_set_force_card_detect(host); pm_runtime_put_autosuspend(&pdev->dev); diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 0e386f5cc83650c2b164e40027934c5b63800ca7..4bc89551229b8afde3471aca76a96aa07d78e3b7 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -490,6 +490,9 @@ static int intel_select_drive_strength(struct mmc_card *card, struct sdhci_pci_slot *slot = sdhci_priv(host); struct intel_host *intel_host = sdhci_pci_priv(slot); + if (!(mmc_driver_type_mask(intel_host->drv_strength) & card_drv)) + return 0; + return intel_host->drv_strength; } diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index a0b5089b3274821b6e84f16e57ffebd08a184cae..fafb02644efde7726ff663d15834477e0b5e4a28 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -238,6 +238,16 @@ static void xenon_voltage_switch(struct sdhci_host *host) { /* Wait for 5ms after set 1.8V signal enable bit */ usleep_range(5000, 5500); + + /* + * For some reason the controller's Host Control2 register reports + * the bit representing 1.8V signaling as 0 when read after it was + * written as 1. Subsequent read reports 1. + * + * Since this may cause some issues, do an empty read of the Host + * Control2 register here to circumvent this. + */ + sdhci_readw(host, SDHCI_HOST_CONTROL2); } static const struct sdhci_ops sdhci_xenon_ops = { diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index e773dc6fdd3c67106ecaea95c0b7969ed9a66f1e..1f0d83086cb092ed1b050a4ea989f4f755cbd30e 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -1883,7 +1883,11 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip, continue; } - if (time_after(jiffies, timeo) && !chip_ready(map, adr)) + /* + * We check "time_after" and "!chip_good" before checking "chip_good" to avoid + * the failure due to scheduling. + */ + if (time_after(jiffies, timeo) && !chip_good(map, adr, datum)) break; if (chip_good(map, adr, datum)) { diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c index 7287696a21f91df6a875dc32a112e6206c855de1..d312c1b5a78e1ad0bfd0ae91971b315fb88fc7fa 100644 --- a/drivers/mtd/devices/phram.c +++ b/drivers/mtd/devices/phram.c @@ -247,22 +247,25 @@ static int phram_setup(const char *val) ret = parse_num64(&start, token[1]); if (ret) { - kfree(name); parse_err("illegal start address\n"); + goto error; } ret = parse_num64(&len, token[2]); if (ret) { - kfree(name); parse_err("illegal device length\n"); + goto error; } ret = register_device(name, start, len); - if (!ret) - pr_info("%s device: %#llx at %#llx\n", name, len, start); - else - kfree(name); + if (ret) + goto error; + + pr_info("%s device: %#llx at %#llx\n", name, len, start); + return 0; +error: + kfree(name); return ret; } diff --git a/drivers/mtd/lpddr/lpddr_cmds.c b/drivers/mtd/lpddr/lpddr_cmds.c index 018c75faadb3d30c2de119608536b1335d175fa4..e1c283ccbbded16fae4a9ba4d2fd50855fbb79e2 100644 --- a/drivers/mtd/lpddr/lpddr_cmds.c +++ b/drivers/mtd/lpddr/lpddr_cmds.c @@ -81,7 +81,6 @@ struct mtd_info *lpddr_cmdset(struct map_info *map) shared = kmalloc(sizeof(struct flchip_shared) * lpddr->numchips, GFP_KERNEL); if (!shared) { - kfree(lpddr); kfree(mtd); return NULL; } diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 755d588bbcb1be05880e201f2350606904e58b02..0b79ddec15b73bbb32a3f4983c9aa114a44c9903 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -71,11 +71,6 @@ struct arp_pkt { }; #pragma pack() -static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) -{ - return (struct arp_pkt *)skb_network_header(skb); -} - /* Forward declaration */ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], bool strict_match); @@ -574,10 +569,11 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) spin_unlock(&bond->mode_lock); } -static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) +static struct slave *rlb_choose_channel(struct sk_buff *skb, + struct bonding *bond, + const struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct arp_pkt *arp = arp_pkt(skb); struct slave *assigned_slave, *curr_active_slave; struct rlb_client_info *client_info; u32 hash_index = 0; @@ -674,8 +670,12 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon */ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) { - struct arp_pkt *arp = arp_pkt(skb); struct slave *tx_slave = NULL; + struct arp_pkt *arp; + + if (!pskb_network_may_pull(skb, sizeof(*arp))) + return NULL; + arp = (struct arp_pkt *)skb_network_header(skb); /* Don't modify or load balance ARPs that do not originate locally * (e.g.,arrive via a bridge). @@ -685,7 +685,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) if (arp->op_code == htons(ARPOP_REPLY)) { /* the arp must be sent on the selected rx channel */ - tx_slave = rlb_choose_channel(skb, bond); + tx_slave = rlb_choose_channel(skb, bond, arp); if (tx_slave) bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr, tx_slave->dev->addr_len); @@ -696,7 +696,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) * When the arp reply is received the entry will be updated * with the correct unicast address of the client. */ - rlb_choose_channel(skb, bond); + rlb_choose_channel(skb, bond, arp); /* The ARP reply packets must be delayed so that * they can cancel out the influence of the ARP request. diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index d92113db4fb97e146388bdcccd60f5898ddc278b..05ad5ed145a3a98efb54b0c1176ec545a10501a9 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -867,6 +867,7 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { = { .len = sizeof(struct can_bittiming) }, [IFLA_CAN_DATA_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, + [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, }; static int can_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 35564a9561b7828be777e5b288afb77f88e95ce8..c5a616395c49036d168abd8b9d94c982e53d7744 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -147,7 +147,7 @@ static void slc_bump(struct slcan *sl) u32 tmpid; char *cmd = sl->rbuff; - cf.can_id = 0; + memset(&cf, 0, sizeof(cf)); switch (*cmd) { case 'r': @@ -186,8 +186,6 @@ static void slc_bump(struct slcan *sl) else return; - *(u64 *) (&cf.data) = 0; /* clear payload */ - /* RTR frames may have a dlc > 0 but they never have any data bytes */ if (!(cf.can_id & CAN_RTR_FLAG)) { for (i = 0; i < cf.can_dlc; i++) { @@ -621,7 +619,10 @@ static int slcan_open(struct tty_struct *tty) tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); slc_free_netdev(sl->dev); + /* do not call free_netdev before rtnl_unlock */ + rtnl_unlock(); free_netdev(sl->dev); + return err; err_exit: rtnl_unlock(); diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 434e6dced6b7f982b257b042877b358d9e4e0af1..274d369151107a464b5c8ad5a5bd8e31b3545af8 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1094,6 +1094,7 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, u16 vid, struct b53_arl_entry *ent, u8 *idx, bool is_valid) { + DECLARE_BITMAP(free_bins, B53_ARLTBL_MAX_BIN_ENTRIES); unsigned int i; int ret; @@ -1101,6 +1102,8 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, if (ret) return ret; + bitmap_zero(free_bins, dev->num_arl_entries); + /* Read the bins */ for (i = 0; i < dev->num_arl_entries; i++) { u64 mac_vid; @@ -1112,13 +1115,21 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, B53_ARLTBL_DATA_ENTRY(i), &fwd_entry); b53_arl_to_entry(ent, mac_vid, fwd_entry); - if (!(fwd_entry & ARLTBL_VALID)) + if (!(fwd_entry & ARLTBL_VALID)) { + set_bit(i, free_bins); continue; + } if ((mac_vid & ARLTBL_MAC_MASK) != mac) continue; *idx = i; + return 0; } + if (bitmap_weight(free_bins, dev->num_arl_entries) == 0) + return -ENOSPC; + + *idx = find_first_bit(free_bins, dev->num_arl_entries); + return -ENOENT; } @@ -1148,10 +1159,21 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, if (op) return ret; - /* We could not find a matching MAC, so reset to a new entry */ - if (ret) { + switch (ret) { + case -ENOSPC: + dev_dbg(dev->dev, "{%pM,%.4d} no space left in ARL\n", + addr, vid); + return is_valid ? ret : 0; + case -ENOENT: + /* We could not find a matching MAC, so reset to a new entry */ + dev_dbg(dev->dev, "{%pM,%.4d} not found, using idx: %d\n", + addr, vid, idx); fwd_entry = 0; - idx = 1; + break; + default: + dev_dbg(dev->dev, "{%pM,%.4d} found, using idx: %d\n", + addr, vid, idx); + break; } memset(&ent, 0, sizeof(ent)); diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index e5c86d44667af1fd9f68a550f169a322989f39bd..247aef92b7594ac5bf48b3b552ddf69bd254d708 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -294,7 +294,7 @@ * * BCM5325 and BCM5365 share most definitions below */ -#define B53_ARLTBL_MAC_VID_ENTRY(n) (0x10 * (n)) +#define B53_ARLTBL_MAC_VID_ENTRY(n) ((0x10 * (n)) + 0x10) #define ARLTBL_MAC_MASK 0xffffffffffffULL #define ARLTBL_VID_S 48 #define ARLTBL_VID_MASK_25 0xff @@ -306,13 +306,16 @@ #define ARLTBL_VALID_25 BIT(63) /* ARL Table Data Entry N Registers (32 bit) */ -#define B53_ARLTBL_DATA_ENTRY(n) ((0x10 * (n)) + 0x08) +#define B53_ARLTBL_DATA_ENTRY(n) ((0x10 * (n)) + 0x18) #define ARLTBL_DATA_PORT_ID_MASK 0x1ff #define ARLTBL_TC(tc) ((3 & tc) << 11) #define ARLTBL_AGE BIT(14) #define ARLTBL_STATIC BIT(15) #define ARLTBL_VALID BIT(16) +/* Maximum number of bin entries in the ARL for all switches */ +#define B53_ARLTBL_MAX_BIN_ENTRIES 4 + /* ARL Search Control Register (8 bit) */ #define B53_ARL_SRCH_CTL 0x50 #define B53_ARL_SRCH_CTL_25 0x20 diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 747062f04bb5e3aa21dee545c6ab8b61f8e98fa9..b40ebc27e1ece2f25c86459f6343a43e11973cdb 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -138,8 +138,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) /* Force link status for IMP port */ reg = core_readl(priv, offset); reg |= (MII_SW_OR | LINK_STS); - if (priv->type == BCM7278_DEVICE_ID) - reg |= GMII_SPEED_UP_2G; + reg &= ~GMII_SPEED_UP_2G; core_writel(priv, reg, offset); /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ @@ -1113,6 +1112,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) const struct bcm_sf2_of_data *data; struct b53_platform_data *pdata; struct dsa_switch_ops *ops; + struct device_node *ports; struct bcm_sf2_priv *priv; struct b53_device *dev; struct dsa_switch *ds; @@ -1175,7 +1175,11 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) */ set_bit(0, priv->cfp.used); - bcm_sf2_identify_ports(priv, dn->child); + ports = of_find_node_by_name(dn, "ports"); + if (ports) { + bcm_sf2_identify_ports(priv, ports); + of_node_put(ports); + } priv->irq0 = irq_of_parse_and_map(dn, 0); priv->irq1 = irq_of_parse_and_map(dn, 1); diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 7f8d269dd75ad347fd616c0230c1adb452d9bac5..814618c0b6328d58cfbcae2035967ba56c657ef4 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -130,17 +130,14 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, (fs->m_ext.vlan_etype || fs->m_ext.data[1])) return -EINVAL; - if (fs->location != RX_CLS_LOC_ANY && fs->location >= CFP_NUM_RULES) + if (fs->location != RX_CLS_LOC_ANY && + fs->location > bcm_sf2_cfp_rule_size(priv)) return -EINVAL; if (fs->location != RX_CLS_LOC_ANY && test_bit(fs->location, priv->cfp.used)) return -EBUSY; - if (fs->location != RX_CLS_LOC_ANY && - fs->location > bcm_sf2_cfp_rule_size(priv)) - return -EINVAL; - ip_frag = be32_to_cpu(fs->m_ext.data[0]); /* We do not support discarding packets, check that the @@ -333,7 +330,7 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, int ret; u32 reg; - if (loc >= CFP_NUM_RULES) + if (loc > bcm_sf2_cfp_rule_size(priv)) return -EINVAL; /* Refuse deletion of unused rules, and the default reserved rule */ diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 3b073e15223731c82c4a4a31d9638a1e5c86342e..58c16aa00a705ce981f4b56e4a8a7b613df2c992 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -549,7 +549,7 @@ mt7530_mib_reset(struct dsa_switch *ds) static void mt7530_port_set_status(struct mt7530_priv *priv, int port, int enable) { - u32 mask = PMCR_TX_EN | PMCR_RX_EN; + u32 mask = PMCR_TX_EN | PMCR_RX_EN | PMCR_FORCE_LNK; if (enable) mt7530_set(priv, MT7530_PMCR_P(port), mask); diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 10e6053f667123f2473fbdee232025515a8be88d..dc9149a32f412a0be72cc22afdf3edf1b01ef694 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -199,6 +199,11 @@ static inline void comp_ctxt_release(struct ena_com_admin_queue *queue, static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue, u16 command_id, bool capture) { + if (unlikely(!queue->comp_ctx)) { + pr_err("Completion context is NULL\n"); + return NULL; + } + if (unlikely(command_id >= queue->q_depth)) { pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n", command_id, queue->q_depth); @@ -843,6 +848,24 @@ static int ena_com_get_feature(struct ena_com_dev *ena_dev, 0); } +static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev) +{ + struct ena_admin_feature_rss_flow_hash_control *hash_key = + (ena_dev->rss).hash_key; + + netdev_rss_key_fill(&hash_key->key, sizeof(hash_key->key)); + /* The key is stored in the device in u32 array + * as well as the API requires the key to be passed in this + * format. Thus the size of our array should be divided by 4 + */ + hash_key->keys_num = sizeof(hash_key->key) / sizeof(u32); +} + +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev) +{ + return ena_dev->rss.hash_func; +} + static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) { struct ena_rss *rss = &ena_dev->rss; @@ -2069,15 +2092,16 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, switch (func) { case ENA_ADMIN_TOEPLITZ: - if (key_len > sizeof(hash_key->key)) { - pr_err("key len (%hu) is bigger than the max supported (%zu)\n", - key_len, sizeof(hash_key->key)); - return -EINVAL; + if (key) { + if (key_len != sizeof(hash_key->key)) { + pr_err("key len (%hu) doesn't equal the supported size (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; + } + memcpy(hash_key->key, key, key_len); + rss->hash_init_val = init_val; + hash_key->keys_num = key_len >> 2; } - - memcpy(hash_key->key, key, key_len); - rss->hash_init_val = init_val; - hash_key->keys_num = key_len >> 2; break; case ENA_ADMIN_CRC32: rss->hash_init_val = init_val; @@ -2114,7 +2138,11 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev, if (unlikely(rc)) return rc; - rss->hash_func = get_resp.u.flow_hash_func.selected_func; + /* ffs() returns 1 in case the lsb is set */ + rss->hash_func = ffs(get_resp.u.flow_hash_func.selected_func); + if (rss->hash_func) + rss->hash_func--; + if (func) *func = rss->hash_func; @@ -2402,6 +2430,8 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size) if (unlikely(rc)) goto err_hash_key; + ena_com_hash_key_fill_default_key(ena_dev); + rc = ena_com_hash_ctrl_init(ena_dev); if (unlikely(rc)) goto err_hash_ctrl; diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 7b784f8a06a6608c37f2c458b8172961ceed08f3..7272fb0d858d0af11222d569890608d1356cb39d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -42,6 +42,7 @@ #include #include #include +#include #include "ena_common_defs.h" #include "ena_admin_defs.h" @@ -631,6 +632,14 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size); */ void ena_com_rss_destroy(struct ena_com_dev *ena_dev); +/* ena_com_get_current_hash_function - Get RSS hash function + * @ena_dev: ENA communication layer struct + * + * Return the current hash function. + * @return: 0 or one of the ena_admin_hash_functions values. + */ +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev); + /* ena_com_fill_hash_function - Fill RSS hash function * @ena_dev: ENA communication layer struct * @func: The hash function (Toeplitz or crc) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index a2f02c23fe141014556a9cb5d24cd4bd856f8f8e..d29e256bf6104fd5fce5bef769eb60299d1dfc4d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -648,6 +648,28 @@ static u32 ena_get_rxfh_key_size(struct net_device *netdev) return ENA_HASH_KEY_SIZE; } +static int ena_indirection_table_get(struct ena_adapter *adapter, u32 *indir) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int i, rc; + + if (!indir) + return 0; + + rc = ena_com_indirect_table_get(ena_dev, indir); + if (rc) + return rc; + + /* Our internal representation of the indices is: even indices + * for Tx and uneven indices for Rx. We need to convert the Rx + * indices to be consecutive + */ + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) + indir[i] = ENA_IO_RXQ_IDX_TO_COMBINED_IDX(indir[i]); + + return rc; +} + static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { @@ -656,11 +678,25 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 func; int rc; - rc = ena_com_indirect_table_get(adapter->ena_dev, indir); + rc = ena_indirection_table_get(adapter, indir); if (rc) return rc; + /* We call this function in order to check if the device + * supports getting/setting the hash function. + */ rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key); + + if (rc) { + if (rc == -EOPNOTSUPP) { + key = NULL; + hfunc = NULL; + rc = 0; + } + + return rc; + } + if (rc) return rc; @@ -669,7 +705,7 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, func = ETH_RSS_HASH_TOP; break; case ENA_ADMIN_CRC32: - func = ETH_RSS_HASH_XOR; + func = ETH_RSS_HASH_CRC32; break; default: netif_err(adapter, drv, netdev, @@ -712,10 +748,13 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, } switch (hfunc) { + case ETH_RSS_HASH_NO_CHANGE: + func = ena_com_get_current_hash_function(ena_dev); + break; case ETH_RSS_HASH_TOP: func = ENA_ADMIN_TOEPLITZ; break; - case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_CRC32: func = ENA_ADMIN_CRC32; break; default: @@ -816,6 +855,7 @@ static const struct ethtool_ops ena_ethtool_ops = { .get_channels = ena_get_channels, .get_tunable = ena_get_tunable, .set_tunable = ena_set_tunable, + .get_ts_info = ethtool_op_get_ts_info, }; void ena_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 518ff393a026b7f86be9a27ae98396ffa8e6bab4..d9ece9ac6f53c68d10f24f4321db58cc1642d72f 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2803,8 +2803,8 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter) if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) return; - keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies + - adapter->keep_alive_timeout); + keep_alive_expired = adapter->last_keep_alive_jiffies + + adapter->keep_alive_timeout; if (unlikely(time_is_before_jiffies(keep_alive_expired))) { netif_err(adapter, drv, adapter->netdev, "Keep alive watchdog timeout.\n"); @@ -2906,7 +2906,7 @@ static void ena_timer_service(unsigned long data) } /* Reset the timer */ - mod_timer(&adapter->timer_service, jiffies + HZ); + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); } static int ena_calc_io_queue_num(struct pci_dev *pdev, diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 3404376c28ca3805564667488152bbc7da9c6329..5a72267b858b16d980f49b9cfca136cea2f363c0 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -113,6 +113,8 @@ #define ENA_IO_TXQ_IDX(q) (2 * (q)) #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) +#define ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q) ((q) / 2) +#define ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q) (((q) - 1) / 2) #define ENA_MGMNT_IRQ_IDX 0 #define ENA_IO_IRQ_FIRST_IDX 1 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index c65d2cdcc7cfb312eee6467f390f56f5a7540003..8556962e682442f676b1cdf1062dda0f9737cfee 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -515,7 +515,7 @@ static void xgbe_isr_task(unsigned long data) xgbe_disable_rx_tx_ints(pdata); /* Turn on polling */ - __napi_schedule_irqoff(&pdata->napi); + __napi_schedule(&pdata->napi); } } else { /* Don't clear Rx/Tx status if doing per channel DMA diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 50dd6bf176d034721590bf15c2abfab88dd5285a..3a489b2b99c99fe0fb644f74575c71be0c8584f0 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -2034,7 +2034,7 @@ static int xgene_enet_probe(struct platform_device *pdev) int ret; ndev = alloc_etherdev_mqs(sizeof(struct xgene_enet_pdata), - XGENE_NUM_RX_RING, XGENE_NUM_TX_RING); + XGENE_NUM_TX_RING, XGENE_NUM_RX_RING); if (!ndev) return -ENOMEM; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index a69f5f1ad32a8e0c8c9195ef60266332af344acb..7a900f76c9ac3615b4f43f538f88fae0d4d8a492 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -519,8 +519,10 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, dx_buff->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) + if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) { + ret = 0; goto exit; + } first = dx_buff; dx_buff->len_pkt = skb->len; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f48f7d104af2183c83d5712ebb4470a96631f326..123ee5c11bc0c9a0371b935f5e6e60338e0a194d 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -645,7 +645,8 @@ static struct sk_buff *bcm_sysport_rx_refill(struct bcm_sysport_priv *priv, dma_addr_t mapping; /* Allocate a new SKB for a new packet */ - skb = netdev_alloc_skb(priv->netdev, RX_BUF_LENGTH); + skb = __netdev_alloc_skb(priv->netdev, RX_BUF_LENGTH, + GFP_ATOMIC | __GFP_NOWARN); if (!skb) { priv->mib.alloc_rx_buff_failed++; netif_err(priv, rx_err, ndev, "SKB alloc failed\n"); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 41bc7820d2dde092a4cc1c218e320de9399b6639..5163da01e54f8d0688cc77e38ac2c23135c24680 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7310,13 +7310,13 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) struct bnxt *bp = netdev_priv(dev); if (netif_running(dev)) - bnxt_close_nic(bp, false, false); + bnxt_close_nic(bp, true, false); dev->mtu = new_mtu; bnxt_set_ring_params(bp); if (netif_running(dev)) - return bnxt_open_nic(bp, false, false); + return bnxt_open_nic(bp, true, false); return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index fed37cd9ae1d464af02335c072b4b3676b024e0b..125e22ffe2ae86f0c23ea25cde377fe3b823aff8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -387,24 +387,26 @@ static int bnxt_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets) { struct bnxt *bp = netdev_priv(dev); struct ieee_ets *my_ets = bp->ieee_ets; + int rc; ets->ets_cap = bp->max_tc; if (!my_ets) { - int rc; - if (bp->dcbx_cap & DCB_CAP_DCBX_HOST) return 0; my_ets = kzalloc(sizeof(*my_ets), GFP_KERNEL); if (!my_ets) - return 0; + return -ENOMEM; rc = bnxt_hwrm_queue_cos2bw_qcfg(bp, my_ets); if (rc) - return 0; + goto error; rc = bnxt_hwrm_queue_pri2cos_qcfg(bp, my_ets); if (rc) - return 0; + goto error; + + /* cache result */ + bp->ieee_ets = my_ets; } ets->cbs = my_ets->cbs; @@ -413,6 +415,9 @@ static int bnxt_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets) memcpy(ets->tc_tsa, my_ets->tc_tsa, sizeof(ets->tc_tsa)); memcpy(ets->prio_tc, my_ets->prio_tc, sizeof(ets->prio_tc)); return 0; +error: + kfree(my_ets); + return rc; } static int bnxt_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 3e3044fe320665123450ab60809acbdc6db95e53..38391230ca8604fded464552868d3b8ca44f470b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -973,6 +973,8 @@ static void bcmgenet_get_ethtool_stats(struct net_device *dev, if (netif_running(dev)) bcmgenet_update_mib_counters(priv); + dev->netdev_ops->ndo_get_stats(dev); + for (i = 0; i < BCMGENET_STATS_LEN; i++) { const struct bcmgenet_stats *s; char *p; @@ -1672,7 +1674,8 @@ static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv, dma_addr_t mapping; /* Allocate a new Rx skb */ - skb = netdev_alloc_skb(priv->dev, priv->rx_buf_len + SKB_ALIGNMENT); + skb = __netdev_alloc_skb(priv->dev, priv->rx_buf_len + SKB_ALIGNMENT, + GFP_ATOMIC | __GFP_NOWARN); if (!skb) { priv->mib.alloc_rx_buff_failed++; netif_err(priv, rx_err, priv->dev, @@ -3215,6 +3218,7 @@ static struct net_device_stats *bcmgenet_get_stats(struct net_device *dev) dev->stats.rx_packets = rx_packets; dev->stats.rx_errors = rx_errors; dev->stats.rx_missed_errors = rx_errors; + dev->stats.rx_dropped = rx_dropped; return &dev->stats; } diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 586e355933108884db4e01bb950801a6740814e0..d678f088925c6d4d8fc669fb5ebca4eb1585ba8f 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -234,10 +234,19 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) lmac = &bgx->lmac[lmacid]; cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); - if (enable) + if (enable) { cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN; - else + + /* enable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1S, + GMI_TXX_INT_UNDFLW); + } else { cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); + + /* Disable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1C, + GMI_TXX_INT_UNDFLW); + } bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); if (bgx->is_rgx) @@ -1340,6 +1349,48 @@ static int bgx_init_phy(struct bgx *bgx) return bgx_init_of_phy(bgx); } +static irqreturn_t bgx_intr_handler(int irq, void *data) +{ + struct bgx *bgx = (struct bgx *)data; + u64 status, val; + int lmac; + + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT); + if (status & GMI_TXX_INT_UNDFLW) { + pci_err(bgx->pdev, "BGX%d lmac%d UNDFLW\n", + bgx->bgx_id, lmac); + val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG); + val &= ~CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + val |= CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + } + /* clear interrupts */ + bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status); + } + + return IRQ_HANDLED; +} + +static void bgx_register_intr(struct pci_dev *pdev) +{ + struct bgx *bgx = pci_get_drvdata(pdev); + int ret; + + ret = pci_alloc_irq_vectors(pdev, BGX_LMAC_VEC_OFFSET, + BGX_LMAC_VEC_OFFSET, PCI_IRQ_ALL_TYPES); + if (ret < 0) { + pci_err(pdev, "Req for #%d msix vectors failed\n", + BGX_LMAC_VEC_OFFSET); + return; + } + ret = pci_request_irq(pdev, GMPX_GMI_TX_INT, bgx_intr_handler, NULL, + bgx, "BGX%d", bgx->bgx_id); + if (ret) + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); +} + static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int err; @@ -1355,7 +1406,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, bgx); - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(dev, "Failed to enable PCI device\n"); pci_set_drvdata(pdev, NULL); @@ -1409,6 +1460,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) bgx_init_hw(bgx); + bgx_register_intr(pdev); + /* Enable all LMACs */ for (lmac = 0; lmac < bgx->lmac_count; lmac++) { err = bgx_lmac_enable(bgx, lmac); @@ -1425,6 +1478,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_enable: bgx_vnic[bgx->bgx_id] = NULL; + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); err_release_regions: pci_release_regions(pdev); err_disable_device: @@ -1442,6 +1496,8 @@ static void bgx_remove(struct pci_dev *pdev) for (lmac = 0; lmac < bgx->lmac_count; lmac++) bgx_lmac_disable(bgx, lmac); + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); + bgx_vnic[bgx->bgx_id] = NULL; pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 23acdc5ab896306f3f1e6d2fbc7d8afb58273888..adaa3bfa5f6cbbe8ac2af9c86820bcca23c3794e 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -179,6 +179,15 @@ #define BGX_GMP_GMI_TXX_BURST 0x38228 #define BGX_GMP_GMI_TXX_MIN_PKT 0x38240 #define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300 +#define BGX_GMP_GMI_TXX_INT 0x38500 +#define BGX_GMP_GMI_TXX_INT_W1S 0x38508 +#define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510 +#define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518 +#define GMI_TXX_INT_PTP_LOST BIT_ULL(4) +#define GMI_TXX_INT_LATE_COL BIT_ULL(3) +#define GMI_TXX_INT_XSDEF BIT_ULL(2) +#define GMI_TXX_INT_XSCOL BIT_ULL(1) +#define GMI_TXX_INT_UNDFLW BIT_ULL(0) #define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */ #define BGX_MSIX_VEC_0_29_CTL 0x400008 diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c index 9f9d6cae39d555057ba63ac6ee874d0884c0c51d..ff7e58a8c90fb59c14472ecaf85ecae5c9febd36 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c @@ -246,6 +246,9 @@ static int cxgb4_ptp_fineadjtime(struct adapter *adapter, s64 delta) FW_PTP_CMD_PORTID_V(0)); c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16)); c.u.ts.sc = FW_PTP_SC_ADJ_FTIME; + c.u.ts.sign = (delta < 0) ? 1 : 0; + if (delta < 0) + delta = -delta; c.u.ts.tm = cpu_to_be64(delta); err = t4_wr_mbox(adapter, adapter->mbox, &c, sizeof(c), NULL); @@ -308,32 +311,17 @@ static int cxgb4_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) */ static int cxgb4_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { - struct adapter *adapter = (struct adapter *)container_of(ptp, - struct adapter, ptp_clock_info); - struct fw_ptp_cmd c; + struct adapter *adapter = container_of(ptp, struct adapter, + ptp_clock_info); u64 ns; - int err; - memset(&c, 0, sizeof(c)); - c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PTP_CMD) | - FW_CMD_REQUEST_F | - FW_CMD_READ_F | - FW_PTP_CMD_PORTID_V(0)); - c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16)); - c.u.ts.sc = FW_PTP_SC_GET_TIME; - - err = t4_wr_mbox(adapter, adapter->mbox, &c, sizeof(c), &c); - if (err < 0) { - dev_err(adapter->pdev_dev, - "PTP: %s error %d\n", __func__, -err); - return err; - } + ns = t4_read_reg(adapter, T5_PORT_REG(0, MAC_PORT_PTP_SUM_LO_A)); + ns |= (u64)t4_read_reg(adapter, + T5_PORT_REG(0, MAC_PORT_PTP_SUM_HI_A)) << 32; /* convert to timespec*/ - ns = be64_to_cpu(c.u.ts.tm); *ts = ns_to_timespec64(ns); - - return err; + return 0; } /** diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 39bcf27902e4b286f981c89a1c26e7709c6dcf2b..0f126ce4645f30c6f51130271dcff33ed400f33a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3609,7 +3609,7 @@ int t4_phy_fw_ver(struct adapter *adap, int *phy_fw_ver) FW_PARAMS_PARAM_Z_V(FW_PARAMS_PARAM_DEV_PHYFW_VERSION)); ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val); - if (ret < 0) + if (ret) return ret; *phy_fw_ver = val; return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index dac90837842bf1b20e93f8d0810dada4e5c34292..d3df6962cf433d8ad9a0ded83fea9677f2f72c0e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1810,6 +1810,9 @@ #define MAC_PORT_CFG2_A 0x818 +#define MAC_PORT_PTP_SUM_LO_A 0x990 +#define MAC_PORT_PTP_SUM_HI_A 0x994 + #define MPS_CMN_CTL_A 0x9000 #define COUNTPAUSEMCRX_S 5 diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 19f374b180fc1ca0a8f537421e5d39fa76001475..52a3b32390a9c821e3fb047cc4c1dc1ec7eae385 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1972,10 +1972,10 @@ static int enic_stop(struct net_device *netdev) napi_disable(&enic->napi[i]); netif_carrier_off(netdev); - netif_tx_disable(netdev); if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) for (i = 0; i < enic->wq_count; i++) napi_disable(&enic->napi[enic_cq_wq(enic, i)]); + netif_tx_disable(netdev); if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_del_station_addr(enic); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 39b8b6730e77cc448e5a9f38fdb2b9d3e77ba266..67246d42c3d9feecec0f08b9022f20611639b797 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2646,9 +2646,7 @@ static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl) headroom = (u16)(bl->priv_data_size + DPAA_PARSE_RESULTS_SIZE + DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE); - return DPAA_FD_DATA_ALIGNMENT ? ALIGN(headroom, - DPAA_FD_DATA_ALIGNMENT) : - headroom; + return ALIGN(headroom, DPAA_FD_DATA_ALIGNMENT); } static int dpaa_eth_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 62bc19bedb0692108d6e1a9affae6d40121523c6..8ba915cc4c2e4e40de46b5ec432541d1379dfc18 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2478,15 +2478,15 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec) return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->rx_coalesce_usecs); if (cycle > 0xFFFF) { pr_err("Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->tx_coalesce_usecs); if (cycle > 0xFFFF) { - pr_err("Rx coalesced usec exceed hardware limitation\n"); + pr_err("Tx coalesced usec exceed hardware limitation\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig index 8870a9a798ca4e0245e6b05ac8d4ee2cf8499b46..91437b94bfcb6a65a8fc8f499be23e8c4c77727a 100644 --- a/drivers/net/ethernet/freescale/fman/Kconfig +++ b/drivers/net/ethernet/freescale/fman/Kconfig @@ -8,3 +8,31 @@ config FSL_FMAN help Freescale Data-Path Acceleration Architecture Frame Manager (FMan) support + +config DPAA_ERRATUM_A050385 + bool + depends on ARM64 && FSL_DPAA + default y + help + DPAA FMan erratum A050385 software workaround implementation: + align buffers, data start, SG fragment length to avoid FMan DMA + splits. + FMAN DMA read or writes under heavy traffic load may cause FMAN + internal resource leak thus stopping further packet processing. + The FMAN internal queue can overflow when FMAN splits single + read or write transactions into multiple smaller transactions + such that more than 17 AXI transactions are in flight from FMAN + to interconnect. When the FMAN internal queue overflows, it can + stall further packet processing. The issue can occur with any + one of the following three conditions: + 1. FMAN AXI transaction crosses 4K address boundary (Errata + A010022) + 2. FMAN DMA address for an AXI transaction is not 16 byte + aligned, i.e. the last 4 bits of an address are non-zero + 3. Scatter Gather (SG) frames have more than one SG buffer in + the SG list and any one of the buffers, except the last + buffer in the SG list has data size that is not a multiple + of 16 bytes, i.e., other than 16, 32, 48, 64, etc. + With any one of the above three conditions present, there is + likelihood of stalled FMAN packet processing, especially under + stress with multiple ports injecting line-rate traffic. diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 97425d94e280d6d12431085e53a0eebed5f12a8e..9080d2332d030d133a656cf702fad5918afcea46 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -1,5 +1,6 @@ /* * Copyright 2008-2015 Freescale Semiconductor Inc. + * Copyright 2020 NXP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -566,6 +567,10 @@ struct fman_cfg { u32 qmi_def_tnums_thresh; }; +#ifdef CONFIG_DPAA_ERRATUM_A050385 +static bool fman_has_err_a050385; +#endif + static irqreturn_t fman_exceptions(struct fman *fman, enum fman_exceptions exception) { @@ -2517,6 +2522,14 @@ struct fman *fman_bind(struct device *fm_dev) } EXPORT_SYMBOL(fman_bind); +#ifdef CONFIG_DPAA_ERRATUM_A050385 +bool fman_has_errata_a050385(void) +{ + return fman_has_err_a050385; +} +EXPORT_SYMBOL(fman_has_errata_a050385); +#endif + static irqreturn_t fman_err_irq(int irq, void *handle) { struct fman *fman = (struct fman *)handle; @@ -2843,6 +2856,11 @@ static struct fman *read_dts_node(struct platform_device *of_dev) goto fman_free; } +#ifdef CONFIG_DPAA_ERRATUM_A050385 + fman_has_err_a050385 = + of_property_read_bool(fm_node, "fsl,erratum-a050385"); +#endif + return fman; fman_node_put: diff --git a/drivers/net/ethernet/freescale/fman/fman.h b/drivers/net/ethernet/freescale/fman/fman.h index bfa02e0014ae01f4a08a600e0ba27a856c0371f6..693401994fa2dd4d8d49462a28304d043c2d3ce4 100644 --- a/drivers/net/ethernet/freescale/fman/fman.h +++ b/drivers/net/ethernet/freescale/fman/fman.h @@ -1,5 +1,6 @@ /* * Copyright 2008-2015 Freescale Semiconductor Inc. + * Copyright 2020 NXP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -397,6 +398,10 @@ u16 fman_get_max_frm(void); int fman_get_rx_extra_headroom(void); +#ifdef CONFIG_DPAA_ERRATUM_A050385 +bool fman_has_errata_a050385(void); +#endif + struct fman *fman_bind(struct device *dev); #endif /* __FM_H */ diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 27d0e3b9833cd6dbd7e9262d4f3474b862068aaf..e4a2c74a9b47e74021b94a3ce6b1e7416675d20e 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2685,13 +2685,17 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) skb_dirtytx = tx_queue->skb_dirtytx; while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) { + bool do_tstamp; + + do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en; frags = skb_shinfo(skb)->nr_frags; /* When time stamping, one additional TxBD must be freed. * Also, we need to dma_unmap_single() the TxPAL. */ - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) + if (unlikely(do_tstamp)) nr_txbds = frags + 2; else nr_txbds = frags + 1; @@ -2705,7 +2709,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) (lstatus & BD_LENGTH_MASK)) break; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { next = next_txbd(bdp, base, tx_ring_size); buflen = be16_to_cpu(next->length) + GMAC_FCB_LEN + GMAC_TXPAL_LEN; @@ -2715,7 +2719,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr), buflen, DMA_TO_DEVICE); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { struct skb_shared_hwtstamps shhwtstamps; u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) & ~0x7UL); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c index 7d95f0866fb0bdc66ef5fb3b8f8ab14c79232ad3..e1de97effcd240694ad17c1348654db632d612b1 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c @@ -398,7 +398,8 @@ static int cmdq_sync_cmd_direct_resp(struct hinic_cmdq *cmdq, spin_unlock_bh(&cmdq->cmdq_lock); - if (!wait_for_completion_timeout(&done, CMDQ_TIMEOUT)) { + if (!wait_for_completion_timeout(&done, + msecs_to_jiffies(CMDQ_TIMEOUT))) { spin_lock_bh(&cmdq->cmdq_lock); if (cmdq->errcode[curr_prod_idx] == &errcode) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 79b56744708427eb741a1fc8f964f28beb3f0961..5763e333a9afe58ccc0c851b049569cbd5fd1985 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -312,6 +312,7 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth, } hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif); + hw_ioctxt.ppf_idx = HINIC_HWIF_PPF_IDX(hwif); hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT; hw_ioctxt.cmdq_depth = 0; @@ -372,50 +373,6 @@ static int wait_for_db_state(struct hinic_hwdev *hwdev) return -EFAULT; } -static int wait_for_io_stopped(struct hinic_hwdev *hwdev) -{ - struct hinic_cmd_io_status cmd_io_status; - struct hinic_hwif *hwif = hwdev->hwif; - struct pci_dev *pdev = hwif->pdev; - struct hinic_pfhwdev *pfhwdev; - unsigned long end; - u16 out_size; - int err; - - if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) { - dev_err(&pdev->dev, "Unsupported PCI Function type\n"); - return -EINVAL; - } - - pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev); - - cmd_io_status.func_idx = HINIC_HWIF_FUNC_IDX(hwif); - - end = jiffies + msecs_to_jiffies(IO_STATUS_TIMEOUT); - do { - err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM, - HINIC_COMM_CMD_IO_STATUS_GET, - &cmd_io_status, sizeof(cmd_io_status), - &cmd_io_status, &out_size, - HINIC_MGMT_MSG_SYNC); - if ((err) || (out_size != sizeof(cmd_io_status))) { - dev_err(&pdev->dev, "Failed to get IO status, ret = %d\n", - err); - return err; - } - - if (cmd_io_status.status == IO_STOPPED) { - dev_info(&pdev->dev, "IO stopped\n"); - return 0; - } - - msleep(20); - } while (time_before(jiffies, end)); - - dev_err(&pdev->dev, "Wait for IO stopped - Timeout\n"); - return -ETIMEDOUT; -} - /** * clear_io_resource - set the IO resources as not active in the NIC * @hwdev: the NIC HW device @@ -435,11 +392,8 @@ static int clear_io_resources(struct hinic_hwdev *hwdev) return -EINVAL; } - err = wait_for_io_stopped(hwdev); - if (err) { - dev_err(&pdev->dev, "IO has not stopped yet\n"); - return err; - } + /* sleep 100ms to wait for firmware stopping I/O */ + msleep(100); cmd_clear_io_res.func_idx = HINIC_HWIF_FUNC_IDX(hwif); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index 0f5563f3b77988c69d91205ec31396d6326fec78..a011fd2d262702fa539a1ecf75236c1aa461551b 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -104,8 +104,8 @@ struct hinic_cmd_hw_ioctxt { u8 rsvd2; u8 rsvd3; + u8 ppf_idx; u8 rsvd4; - u8 rsvd5; u16 rq_depth; u16 rx_buf_sz_idx; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h index 5b4760c0e9f531301db219de567198b01b81c995..f683ccbdfca021632455a557e7fccf3827f7460f 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h @@ -146,6 +146,7 @@ #define HINIC_HWIF_FUNC_IDX(hwif) ((hwif)->attr.func_idx) #define HINIC_HWIF_PCI_INTF(hwif) ((hwif)->attr.pci_intf_idx) #define HINIC_HWIF_PF_IDX(hwif) ((hwif)->attr.pf_idx) +#define HINIC_HWIF_PPF_IDX(hwif) ((hwif)->attr.ppf_idx) #define HINIC_FUNC_TYPE(hwif) ((hwif)->attr.func_type) #define HINIC_IS_PF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PF) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c index 278dc13f3dae8ccda357cf866adb01ada0d11786..9fcf2e5e000395100a7977f02dfb3b66c02307eb 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c @@ -52,7 +52,7 @@ #define MSG_NOT_RESP 0xFFFF -#define MGMT_MSG_TIMEOUT 1000 +#define MGMT_MSG_TIMEOUT 5000 #define mgmt_to_pfhwdev(pf_mgmt) \ container_of(pf_mgmt, struct hinic_pfhwdev, pf_to_mgmt) @@ -276,7 +276,8 @@ static int msg_to_mgmt_sync(struct hinic_pf_to_mgmt *pf_to_mgmt, goto unlock_sync_msg; } - if (!wait_for_completion_timeout(recv_done, MGMT_MSG_TIMEOUT)) { + if (!wait_for_completion_timeout(recv_done, + msecs_to_jiffies(MGMT_MSG_TIMEOUT))) { dev_err(&pdev->dev, "MGMT timeout, MSG id = %d\n", msg_id); err = -ETIMEDOUT; goto unlock_sync_msg; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index a0c64b30f81a7a5b7b400304e1e8844f34ac32a9..a115e51dc2115543bbbd4baec899c5ead8bc8ef1 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2759,11 +2759,10 @@ static int mvneta_poll(struct napi_struct *napi, int budget) /* For the case where the last mvneta_poll did not process all * RX packets */ - rx_queue = fls(((cause_rx_tx >> 8) & 0xff)); - cause_rx_tx |= pp->neta_armada3700 ? pp->cause_rx_tx : port->cause_rx_tx; + rx_queue = fls(((cause_rx_tx >> 8) & 0xff)); if (rx_queue) { rx_queue = rx_queue - 1; if (pp->bm_priv) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 51e6846da72bccf32a2360ff7093c7a5a616eb03..3c04f3d5de2dc4cacde08fdd2cd8d872dd7087f0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -225,7 +225,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, start_again: err = devlink_dpipe_entry_ctx_prepare(dump_ctx); if (err) - return err; + goto err_ctx_prepare; j = 0; for (; i < rif_count; i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); @@ -257,6 +257,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, return 0; err_entry_append: err_entry_get: +err_ctx_prepare: rtnl_unlock(); devlink_dpipe_entry_clear(&entry); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 8aace9a06a5d8e7e396bc5504619dacc556278f2..ea47047265053f0dea0d7e6f3cc8713253b12980 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -112,9 +112,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, u8 prio = tcf_vlan_push_prio(a); u16 vid = tcf_vlan_push_vid(a); - return mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, - action, vid, - proto, prio); + err = mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, + action, vid, + proto, prio); + if (err) + return err; } else { dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index adbe0a6fe0db912dc12831a74f7bd824ad4ff87f..1b0e1fc7825f9138d6792ad93122e9cad7688904 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -475,21 +475,47 @@ static int msg_enable; */ /** - * ks_rdreg8 - read 8 bit register from device + * ks_check_endian - Check whether endianness of the bus is correct * @ks : The chip information - * @offset: The register address * - * Read a 8bit register from the chip, returning the result + * The KS8851-16MLL EESK pin allows selecting the endianness of the 16bit + * bus. To maintain optimum performance, the bus endianness should be set + * such that it matches the endianness of the CPU. */ -static u8 ks_rdreg8(struct ks_net *ks, int offset) + +static int ks_check_endian(struct ks_net *ks) { - u16 data; - u8 shift_bit = offset & 0x03; - u8 shift_data = (offset & 1) << 3; - ks->cmd_reg_cache = (u16) offset | (u16)(BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - data = ioread16(ks->hw_addr); - return (u8)(data >> shift_data); + u16 cider; + + /* + * Read CIDER register first, however read it the "wrong" way around. + * If the endian strap on the KS8851-16MLL in incorrect and the chip + * is operating in different endianness than the CPU, then the meaning + * of BE[3:0] byte-enable bits is also swapped such that: + * BE[3,2,1,0] becomes BE[1,0,3,2] + * + * Luckily for us, the byte-enable bits are the top four MSbits of + * the address register and the CIDER register is at offset 0xc0. + * Hence, by reading address 0xc0c0, which is not impacted by endian + * swapping, we assert either BE[3:2] or BE[1:0] while reading the + * CIDER register. + * + * If the bus configuration is correct, reading 0xc0c0 asserts + * BE[3:2] and this read returns 0x0000, because to read register + * with bottom two LSbits of address set to 0, BE[1:0] must be + * asserted. + * + * If the bus configuration is NOT correct, reading 0xc0c0 asserts + * BE[1:0] and this read returns non-zero 0x8872 value. + */ + iowrite16(BE3 | BE2 | KS_CIDER, ks->hw_addr_cmd); + cider = ioread16(ks->hw_addr); + if (!cider) + return 0; + + netdev_err(ks->netdev, "incorrect EESK endian strap setting\n"); + + return -EINVAL; } /** @@ -507,22 +533,6 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset) return ioread16(ks->hw_addr); } -/** - * ks_wrreg8 - write 8bit register value to chip - * @ks: The chip information - * @offset: The register address - * @value: The value to write - * - */ -static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) -{ - u8 shift_bit = (offset & 0x03); - u16 value_write = (u16)(value << ((offset & 1) << 3)); - ks->cmd_reg_cache = (u16)offset | (BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - iowrite16(value_write, ks->hw_addr); -} - /** * ks_wrreg16 - write 16bit register value to chip * @ks: The chip information @@ -642,8 +652,7 @@ static void ks_read_config(struct ks_net *ks) u16 reg_data = 0; /* Regardless of bus width, 8 bit read should always work.*/ - reg_data = ks_rdreg8(ks, KS_CCR) & 0x00FF; - reg_data |= ks_rdreg8(ks, KS_CCR+1) << 8; + reg_data = ks_rdreg16(ks, KS_CCR); /* addr/data bus are multiplexed */ ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED; @@ -747,7 +756,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) /* 1. set sudo DMA mode */ ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. read prepend data */ /** @@ -764,7 +773,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) ks_inblk(ks, buf, ALIGN(len, 4)); /* 4. reset sudo DMA Mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); } /** @@ -866,14 +875,17 @@ static irqreturn_t ks_irq(int irq, void *pw) { struct net_device *netdev = pw; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; u16 status; + spin_lock_irqsave(&ks->statelock, flags); /*this should be the first in IRQ handler */ ks_save_cmd_reg(ks); status = ks_rdreg16(ks, KS_ISR); if (unlikely(!status)) { ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_NONE; } @@ -899,6 +911,7 @@ static irqreturn_t ks_irq(int irq, void *pw) ks->netdev->stats.rx_over_errors++; /* this should be the last in IRQ handler*/ ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_HANDLED; } @@ -968,6 +981,7 @@ static int ks_net_stop(struct net_device *netdev) /* shutdown RX/TX QMU */ ks_disable_qmu(ks); + ks_disable_int(ks); /* set powermode to soft power down to save power */ ks_set_powermode(ks, PMECR_PM_SOFTDOWN); @@ -997,13 +1011,13 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len) ks->txh.txw[1] = cpu_to_le16(len); /* 1. set sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. write status/lenth info */ ks_outblk(ks, ks->txh.txw, 4); /* 3. write pkt data */ ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4)); /* 4. reset sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); /* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */ ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE); /* 6. wait until TXQCR_METFE is auto-cleared */ @@ -1024,10 +1038,9 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) { netdev_tx_t retv = NETDEV_TX_OK; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; - disable_irq(netdev->irq); - ks_disable_int(ks); - spin_lock(&ks->statelock); + spin_lock_irqsave(&ks->statelock, flags); /* Extra space are required: * 4 byte for alignment, 4 for status/length, 4 for CRC @@ -1041,9 +1054,7 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb(skb); } else retv = NETDEV_TX_BUSY; - spin_unlock(&ks->statelock); - ks_enable_int(ks); - enable_irq(netdev->irq); + spin_unlock_irqrestore(&ks->statelock, flags); return retv; } @@ -1572,6 +1583,10 @@ static int ks8851_probe(struct platform_device *pdev) goto err_free; } + err = ks_check_endian(ks); + if (err) + goto err_free; + netdev->irq = platform_get_irq(pdev, 0); if ((int)netdev->irq < 0) { diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.h b/drivers/net/ethernet/neterion/vxge/vxge-config.h index cfa970417f818036bc83d6ed22deeb47cea28e90..fe4a4315d20d465416df53773e0475ccbab9ed18 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.h @@ -2065,7 +2065,7 @@ vxge_hw_vpath_strip_fcs_check(struct __vxge_hw_device *hldev, u64 vpath_mask); if ((level >= VXGE_ERR && VXGE_COMPONENT_LL & VXGE_DEBUG_ERR_MASK) || \ (level >= VXGE_TRACE && VXGE_COMPONENT_LL & VXGE_DEBUG_TRACE_MASK))\ if ((mask & VXGE_DEBUG_MASK) == mask) \ - printk(fmt "\n", __VA_ARGS__); \ + printk(fmt "\n", ##__VA_ARGS__); \ } while (0) #else #define vxge_debug_ll(level, mask, fmt, ...) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.h b/drivers/net/ethernet/neterion/vxge/vxge-main.h index 3a79d93b8445308663b5b02f06e172762faba708..5b535aa10d23e263c324cd961be90e3fad0f26b7 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.h @@ -454,49 +454,49 @@ int vxge_fw_upgrade(struct vxgedev *vdev, char *fw_name, int override); #if (VXGE_DEBUG_LL_CONFIG & VXGE_DEBUG_MASK) #define vxge_debug_ll_config(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, ##__VA_ARGS__) #else #define vxge_debug_ll_config(level, fmt, ...) #endif #if (VXGE_DEBUG_INIT & VXGE_DEBUG_MASK) #define vxge_debug_init(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, ##__VA_ARGS__) #else #define vxge_debug_init(level, fmt, ...) #endif #if (VXGE_DEBUG_TX & VXGE_DEBUG_MASK) #define vxge_debug_tx(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, ##__VA_ARGS__) #else #define vxge_debug_tx(level, fmt, ...) #endif #if (VXGE_DEBUG_RX & VXGE_DEBUG_MASK) #define vxge_debug_rx(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, ##__VA_ARGS__) #else #define vxge_debug_rx(level, fmt, ...) #endif #if (VXGE_DEBUG_MEM & VXGE_DEBUG_MASK) #define vxge_debug_mem(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, ##__VA_ARGS__) #else #define vxge_debug_mem(level, fmt, ...) #endif #if (VXGE_DEBUG_ENTRYEXIT & VXGE_DEBUG_MASK) #define vxge_debug_entryexit(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, ##__VA_ARGS__) #else #define vxge_debug_entryexit(level, fmt, ...) #endif #if (VXGE_DEBUG_INTR & VXGE_DEBUG_MASK) #define vxge_debug_intr(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, ##__VA_ARGS__) #else #define vxge_debug_intr(level, fmt, ...) #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index c4e8bf0773fe980c5e0e10ebdb0507dd6a697e87..6024b832b4d95693f02e6aad6117503a9ace33db 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -3151,26 +3151,20 @@ static void qed_chain_free_single(struct qed_dev *cdev, static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain) { - void **pp_virt_addr_tbl = p_chain->pbl.pp_virt_addr_tbl; + struct addr_tbl_entry *pp_addr_tbl = p_chain->pbl.pp_addr_tbl; u32 page_cnt = p_chain->page_cnt, i, pbl_size; - u8 *p_pbl_virt = p_chain->pbl_sp.p_virt_table; - if (!pp_virt_addr_tbl) + if (!pp_addr_tbl) return; - if (!p_pbl_virt) - goto out; - for (i = 0; i < page_cnt; i++) { - if (!pp_virt_addr_tbl[i]) + if (!pp_addr_tbl[i].virt_addr || !pp_addr_tbl[i].dma_map) break; dma_free_coherent(&cdev->pdev->dev, QED_CHAIN_PAGE_SIZE, - pp_virt_addr_tbl[i], - *(dma_addr_t *)p_pbl_virt); - - p_pbl_virt += QED_CHAIN_PBL_ENTRY_SIZE; + pp_addr_tbl[i].virt_addr, + pp_addr_tbl[i].dma_map); } pbl_size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE; @@ -3180,9 +3174,9 @@ static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain) pbl_size, p_chain->pbl_sp.p_virt_table, p_chain->pbl_sp.p_phys_table); -out: - vfree(p_chain->pbl.pp_virt_addr_tbl); - p_chain->pbl.pp_virt_addr_tbl = NULL; + + vfree(p_chain->pbl.pp_addr_tbl); + p_chain->pbl.pp_addr_tbl = NULL; } void qed_chain_free(struct qed_dev *cdev, struct qed_chain *p_chain) @@ -3283,19 +3277,19 @@ qed_chain_alloc_pbl(struct qed_dev *cdev, { u32 page_cnt = p_chain->page_cnt, size, i; dma_addr_t p_phys = 0, p_pbl_phys = 0; - void **pp_virt_addr_tbl = NULL; + struct addr_tbl_entry *pp_addr_tbl; u8 *p_pbl_virt = NULL; void *p_virt = NULL; - size = page_cnt * sizeof(*pp_virt_addr_tbl); - pp_virt_addr_tbl = vzalloc(size); - if (!pp_virt_addr_tbl) + size = page_cnt * sizeof(*pp_addr_tbl); + pp_addr_tbl = vzalloc(size); + if (!pp_addr_tbl) return -ENOMEM; /* The allocation of the PBL table is done with its full size, since it * is expected to be successive. * qed_chain_init_pbl_mem() is called even in a case of an allocation - * failure, since pp_virt_addr_tbl was previously allocated, and it + * failure, since tbl was previously allocated, and it * should be saved to allow its freeing during the error flow. */ size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE; @@ -3309,8 +3303,7 @@ qed_chain_alloc_pbl(struct qed_dev *cdev, p_chain->b_external_pbl = true; } - qed_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys, - pp_virt_addr_tbl); + qed_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys, pp_addr_tbl); if (!p_pbl_virt) return -ENOMEM; @@ -3329,7 +3322,8 @@ qed_chain_alloc_pbl(struct qed_dev *cdev, /* Fill the PBL table with the physical address of the page */ *(dma_addr_t *)p_pbl_virt = p_phys; /* Keep the virtual address of the page */ - p_chain->pbl.pp_virt_addr_tbl[i] = p_virt; + p_chain->pbl.pp_addr_tbl[i].virt_addr = p_virt; + p_chain->pbl.pp_addr_tbl[i].dma_map = p_phys; p_pbl_virt += QED_CHAIN_PBL_ENTRY_SIZE; } diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index adb700512baa90ee252b0ca6a28c707ae527375f..a80531b5aeccdd9bf786a46365f8b4db866af289 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -156,6 +156,8 @@ struct qede_rdma_dev { struct list_head entry; struct list_head rdma_event_list; struct workqueue_struct *rdma_wq; + struct kref refcnt; + struct completion event_comp; }; struct qede_ptp; diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index 1900bf7e67d1297dc9b24648e99d0ec50f779305..cd12fb919ad574ba5f46207d0315518f652631f7 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -57,6 +57,9 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) static int qede_rdma_create_wq(struct qede_dev *edev) { INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list); + kref_init(&edev->rdma_info.refcnt); + init_completion(&edev->rdma_info.event_comp); + edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq"); if (!edev->rdma_info.rdma_wq) { DP_NOTICE(edev, "qedr: Could not create workqueue\n"); @@ -81,8 +84,23 @@ static void qede_rdma_cleanup_event(struct qede_dev *edev) } } +static void qede_rdma_complete_event(struct kref *ref) +{ + struct qede_rdma_dev *rdma_dev = + container_of(ref, struct qede_rdma_dev, refcnt); + + /* no more events will be added after this */ + complete(&rdma_dev->event_comp); +} + static void qede_rdma_destroy_wq(struct qede_dev *edev) { + /* Avoid race with add_event flow, make sure it finishes before + * we start accessing the list and cleaning up the work + */ + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); + wait_for_completion(&edev->rdma_info.event_comp); + qede_rdma_cleanup_event(edev); destroy_workqueue(edev->rdma_info.rdma_wq); } @@ -287,15 +305,24 @@ static void qede_rdma_add_event(struct qede_dev *edev, if (!edev->rdma_info.qedr_dev) return; + /* We don't want the cleanup flow to start while we're allocating and + * scheduling the work + */ + if (!kref_get_unless_zero(&edev->rdma_info.refcnt)) + return; /* already being destroyed */ + event_node = qede_rdma_get_free_event_node(edev); if (!event_node) - return; + goto out; event_node->event = event; event_node->ptr = edev; INIT_WORK(&event_node->work, qede_rdma_handle_event); queue_work(edev->rdma_info.rdma_wq, &event_node->work); + +out: + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); } void qede_rdma_dev_event_open(struct qede_dev *edev) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index 07f9067affc65ea4d73543c18016174217c1971a..cda5b0a9e9489b71c19871c3193332a88127771b 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -1720,7 +1720,7 @@ static int qlcnic_83xx_get_reset_instruction_template(struct qlcnic_adapter *p_d ahw->reset.seq_error = 0; ahw->reset.buff = kzalloc(QLC_83XX_RESTART_TEMPLATE_SIZE, GFP_KERNEL); - if (p_dev->ahw->reset.buff == NULL) + if (ahw->reset.buff == NULL) return -ENOMEM; p_buff = p_dev->ahw->reset.buff; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 390323cadb9228a62c39549fe165721e5a9b6472..99d66239907dd7adf87df9b02d531344a1cc1a62 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -183,6 +183,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, data_format = RMNET_INGRESS_FORMAT_IP_ROUTE | RMNET_EGRESS_FORMAT_IP_ROUTE; + if (!tb[IFLA_LINK]) { + NL_SET_ERR_MSG_MOD(extack, "link not specified"); + return -EINVAL; + } + real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev || !dev) return -ENODEV; diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 89831adb8eb75c51e00f7e919880601d8e57fa55..6d27eec85fcef920f07c3c280d0df6e6d088e4d7 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -2284,7 +2284,7 @@ static int __init sxgbe_cmdline_opt(char *str) if (!str || !*str) return -EINVAL; while ((opt = strsep(&str, ",")) != NULL) { - if (!strncmp(opt, "eee_timer:", 6)) { + if (!strncmp(opt, "eee_timer:", 10)) { if (kstrtoint(opt + 10, 0, &eee_timer)) goto err; } diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b9cb697b281847a83aa511c577a6c790516f8012..e0d4c1e850cfe42cf8c84c9cf49b43c816009b1f 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -505,6 +505,7 @@ efx_copy_channel(const struct efx_channel *old_channel) if (tx_queue->channel) tx_queue->channel = channel; tx_queue->buffer = NULL; + tx_queue->cb_page = NULL; memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 712b5eb3507ab3c46297e932a2b846e1a8012527..4156cf007b532411d745ec33b2c50591678f5639 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1362,7 +1362,7 @@ static int rk_gmac_probe(struct platform_device *pdev) ret = rk_gmac_clk_init(plat_dat); if (ret) - return ret; + goto err_remove_config_dt; ret = rk_gmac_powerup(plat_dat->bsp_priv); if (ret) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 5b3b06a0a3bf53e1eac9572ae8d14add0c3835e7..33407df6bea693b44903734f939b0bd83e1e12f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -274,16 +274,19 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac) phymode == PHY_INTERFACE_MODE_MII || phymode == PHY_INTERFACE_MODE_GMII || phymode == PHY_INTERFACE_MODE_SGMII) { - ctrl |= SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2); regmap_read(sys_mgr_base_addr, SYSMGR_FPGAGRP_MODULE_REG, &module); module |= (SYSMGR_FPGAGRP_MODULE_EMAC << (reg_shift / 2)); regmap_write(sys_mgr_base_addr, SYSMGR_FPGAGRP_MODULE_REG, module); - } else { - ctrl &= ~(SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2)); } + if (dwmac->f2h_ptp_ref_clk) + ctrl |= SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2); + else + ctrl &= ~(SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << + (reg_shift / 2)); + regmap_write(sys_mgr_base_addr, reg_offset, ctrl); /* Deassert reset for the phy configuration to be sampled by diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index fc1fa0f9f338713da5c6fcfdd19062ced459a91f..57694eada9955f774e72360d4782713dff2013a8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -155,6 +155,8 @@ static int sun7i_gmac_probe(struct platform_device *pdev) plat_dat->init = sun7i_gmac_init; plat_dat->exit = sun7i_gmac_exit; plat_dat->fix_mac_speed = sun7i_fix_speed; + plat_dat->tx_fifo_size = 4096; + plat_dat->rx_fifo_size = 16384; ret = sun7i_gmac_init(pdev, plat_dat->bsp_priv); if (ret) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 08dd6a06ac58d72727161c6526b7c2f7b02b3ab6..f76d4a7281af052d7dd140bb2aaf8d7df0e40f8b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -218,7 +218,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, reg++; } - while (reg <= perfect_addr_number) { + while (reg < perfect_addr_number) { writel(0, ioaddr + GMAC_ADDR_HIGH(reg)); writel(0, ioaddr + GMAC_ADDR_LOW(reg)); reg++; diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 71ff6bd4be9f2c440b5106d98e0aec0568476a4e..baf8aab59f82a8fd9cbe28d00cbb8f8ab208f149 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -256,6 +256,7 @@ void ipvlan_process_multicast(struct work_struct *work) } if (dev) dev_put(dev); + cond_resched(); } } @@ -448,19 +449,21 @@ static int ipvlan_process_outbound(struct sk_buff *skb) struct ethhdr *ethh = eth_hdr(skb); int ret = NET_XMIT_DROP; - /* In this mode we dont care about multicast and broadcast traffic */ - if (is_multicast_ether_addr(ethh->h_dest)) { - pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", - ntohs(skb->protocol)); - kfree_skb(skb); - goto out; - } - /* The ipvlan is a pseudo-L2 device, so the packets that we receive * will have L2; which need to discarded and processed further * in the net-ns of the main-device. */ if (skb_mac_header_was_set(skb)) { + /* In this mode we dont care about + * multicast and broadcast traffic */ + if (is_multicast_ether_addr(ethh->h_dest)) { + pr_debug_ratelimited( + "Dropped {multi|broad}cast of type=[%x]\n", + ntohs(skb->protocol)); + kfree_skb(skb); + goto out; + } + skb_pull(skb, sizeof(*ethh)); skb->mac_header = (typeof(skb->mac_header))~0U; skb_reset_network_header(skb); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 09f6795cce534a6056a6b51864d83e487daf6912..cd32d6623f6a32acf554ff77b797c5ca280305a7 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -236,7 +236,6 @@ static void ipvlan_uninit(struct net_device *dev) static int ipvlan_open(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); - struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; if (ipvlan->port->mode == IPVLAN_MODE_L3 || @@ -248,7 +247,7 @@ static int ipvlan_open(struct net_device *dev) list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_add(ipvlan, addr); - return dev_uc_add(phy_dev, phy_dev->dev_addr); + return 0; } static int ipvlan_stop(struct net_device *dev) @@ -260,8 +259,6 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_unsync(phy_dev, dev); dev_mc_unsync(phy_dev, dev); - dev_uc_del(phy_dev, phy_dev->dev_addr); - list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_del(addr); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 9a14a483ebda4b620c6c510def7e4142f9bdbbc5..9c9b73f22c0707647ba7a4ce7f8dc96120536a34 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -3169,6 +3170,11 @@ static void macsec_dev_set_rx_mode(struct net_device *dev) dev_uc_sync(real_dev, dev); } +static sci_t dev_to_sci(struct net_device *dev, __be16 port) +{ + return make_sci(dev->dev_addr, port); +} + static int macsec_set_mac_address(struct net_device *dev, void *p) { struct macsec_dev *macsec = macsec_priv(dev); @@ -3284,6 +3290,7 @@ static const struct device_type macsec_type = { static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCI] = { .type = NLA_U64 }, + [IFLA_MACSEC_PORT] = { .type = NLA_U16 }, [IFLA_MACSEC_ICV_LEN] = { .type = NLA_U8 }, [IFLA_MACSEC_CIPHER_SUITE] = { .type = NLA_U64 }, [IFLA_MACSEC_WINDOW] = { .type = NLA_U32 }, @@ -3531,19 +3538,22 @@ static int macsec_newlink(struct net *net, struct net_device *dev, struct netlink_ext_ack *extack) { struct macsec_dev *macsec = macsec_priv(dev); - struct net_device *real_dev; struct macsec_context ctx; const struct macsec_ops *ops; - int err; - sci_t sci; u8 icv_len = DEFAULT_ICV_LEN; rx_handler_func_t *rx_handler; + u8 icv_len = DEFAULT_ICV_LEN; + struct net_device *real_dev; + int err, mtu; + sci_t sci; if (!tb[IFLA_LINK]) return -EINVAL; real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; + if (real_dev->type != ARPHRD_ETHER) + return -EINVAL; dev->priv_flags |= IFF_MACSEC; @@ -3551,7 +3561,11 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (data && data[IFLA_MACSEC_ICV_LEN]) icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); - dev->mtu = real_dev->mtu - icv_len - macsec_extra_len(true); + mtu = real_dev->mtu - icv_len - macsec_extra_len(true); + if (mtu < 0) + dev->mtu = 0; + else + dev->mtu = mtu; rx_handler = rtnl_dereference(real_dev->rx_handler); if (rx_handler && rx_handler != macsec_handle_frame) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index ab539136d5bff8770b38f2547d92f0e4a9226480..3072fc902eca57c79a8130119ddd77d7e40ee42e 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -338,6 +338,8 @@ static void macvlan_process_broadcast(struct work_struct *w) if (src) dev_put(src->dev); kfree_skb(skb); + + cond_resched(); } } @@ -1671,7 +1673,7 @@ static int macvlan_device_event(struct notifier_block *unused, struct macvlan_dev, list); - if (macvlan_sync_address(vlan->dev, dev->dev_addr)) + if (vlan && macvlan_sync_address(vlan->dev, dev->dev_addr)) return NOTIFY_BAD; break; diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c index 46fe1ae919a30a9a9b7644b5862f4a5bfa0b56ef..51ce3ea17fb37f0533bb1dae3ad76e5797d24976 100644 --- a/drivers/net/phy/mdio-bcm-iproc.c +++ b/drivers/net/phy/mdio-bcm-iproc.c @@ -188,6 +188,23 @@ static int iproc_mdio_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +int iproc_mdio_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct iproc_mdio_priv *priv = platform_get_drvdata(pdev); + + /* restore the mii clock configuration */ + iproc_mdio_config_clk(priv->base); + + return 0; +} + +static const struct dev_pm_ops iproc_mdio_pm_ops = { + .resume = iproc_mdio_resume +}; +#endif /* CONFIG_PM_SLEEP */ + static const struct of_device_id iproc_mdio_of_match[] = { { .compatible = "brcm,iproc-mdio", }, { /* sentinel */ }, @@ -198,6 +215,9 @@ static struct platform_driver iproc_mdio_driver = { .driver = { .name = "iproc-mdio", .of_match_table = iproc_mdio_of_match, +#ifdef CONFIG_PM_SLEEP + .pm = &iproc_mdio_pm_ops, +#endif }, .probe = iproc_mdio_probe, .remove = iproc_mdio_remove, diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index a44b004086b78a4e1d331b43484161486359061e..cdee06bcd85e3d1272a51b1818cc10c220fc3c37 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -31,6 +31,7 @@ #include #include #include +#include /* Operation Mode Strap Override */ #define MII_KSZPHY_OMSO 0x16 @@ -777,6 +778,12 @@ static int kszphy_resume(struct phy_device *phydev) genphy_resume(phydev); + /* After switching from power-down to normal mode, an internal global + * reset is automatically generated. Wait a minimum of 1 ms before + * read/write access to the PHY registers. + */ + usleep_range(1000, 2000); + ret = kszphy_config_reset(phydev); if (ret) return ret; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0a85141d8e7691088641a26358a815db1ec7f5f3..295721efefbed0000f5b353a2b2cc6e16785c884 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -76,7 +76,7 @@ static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); #ifdef CONFIG_PM -static bool mdio_bus_phy_may_suspend(struct phy_device *phydev, bool suspend) +static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { struct device_driver *drv = phydev->mdio.dev.driver; struct phy_driver *phydrv = to_phy_driver(drv); @@ -88,11 +88,10 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev, bool suspend) /* PHY not attached? May suspend if the PHY has not already been * suspended as part of a prior call to phy_disconnect() -> * phy_detach() -> phy_suspend() because the parent netdev might be the - * MDIO bus driver and clock gated at this point. Also may resume if - * PHY is not attached. + * MDIO bus driver and clock gated at this point. */ if (!netdev) - return suspend ? !phydev->suspended : phydev->suspended; + goto out; /* Don't suspend PHY if the attached netdev parent may wakeup. * The parent may point to a PCI device, as in tg3 driver. @@ -107,7 +106,8 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev, bool suspend) if (device_may_wakeup(&netdev->dev)) return false; - return true; +out: + return !phydev->suspended; } static int mdio_bus_phy_suspend(struct device *dev) @@ -122,9 +122,11 @@ static int mdio_bus_phy_suspend(struct device *dev) if (phydev->attached_dev && phydev->adjust_link) phy_stop_machine(phydev); - if (!mdio_bus_phy_may_suspend(phydev, true)) + if (!mdio_bus_phy_may_suspend(phydev)) return 0; + phydev->suspended_by_mdio_bus = true; + return phy_suspend(phydev); } @@ -133,9 +135,11 @@ static int mdio_bus_phy_resume(struct device *dev) struct phy_device *phydev = to_phy_device(dev); int ret; - if (!mdio_bus_phy_may_suspend(phydev, false)) + if (!phydev->suspended_by_mdio_bus) goto no_resume; + phydev->suspended_by_mdio_bus = false; + ret = phy_resume(phydev); if (ret < 0) return ret; diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index ea90db3c77058b6a799245bd5a3ff9f672b5da5e..01334aeac577a6a6f36977a71bd9666f9afea1d0 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -232,7 +232,7 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, register struct cstate *cs = lcs->next; register unsigned long deltaS, deltaA; register short changes = 0; - int hlen; + int nlen, hlen; unsigned char new_seq[16]; register unsigned char *cp = new_seq; struct iphdr *ip; @@ -248,6 +248,8 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, return isize; ip = (struct iphdr *) icp; + if (ip->version != 4 || ip->ihl < 5) + return isize; /* Bail if this packet isn't TCP, or is an IP fragment */ if (ip->protocol != IPPROTO_TCP || (ntohs(ip->frag_off) & 0x3fff)) { @@ -258,10 +260,14 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, comp->sls_o_tcp++; return isize; } - /* Extract TCP header */ + nlen = ip->ihl * 4; + if (isize < nlen + sizeof(*th)) + return isize; - th = (struct tcphdr *)(((unsigned char *)ip) + ip->ihl*4); - hlen = ip->ihl*4 + th->doff*4; + th = (struct tcphdr *)(icp + nlen); + if (th->doff < sizeof(struct tcphdr) / 4) + return isize; + hlen = nlen + th->doff * 4; /* Bail if the TCP packet isn't `compressible' (i.e., ACK isn't set or * some other control bit is set). Also uncompressible if diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index b07f367abd9157ee2416af634d8853cb6af9f7d7..d7882b548b79a8e7286d1f6a246fa536f923b8c0 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -867,7 +867,10 @@ static int slip_open(struct tty_struct *tty) tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); sl_free_netdev(sl->dev); + /* do not call free_netdev before rtnl_unlock */ + rtnl_unlock(); free_netdev(sl->dev); + return err; err_exit: rtnl_unlock(); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index f1aabf8a16c2f3e9daf8a8ea7323d6175d65ffd6..396a8c6cb9992c48ac9a2c4d23758c21563bba81 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -480,6 +480,9 @@ static const struct team_mode *team_mode_get(const char *kind) struct team_mode_item *mitem; const struct team_mode *mode = NULL; + if (!try_module_get(THIS_MODULE)) + return NULL; + spin_lock(&mode_list_lock); mitem = __find_mode(kind); if (!mitem) { @@ -495,6 +498,7 @@ static const struct team_mode *team_mode_get(const char *kind) } spin_unlock(&mode_list_lock); + module_put(THIS_MODULE); return mode; } @@ -2207,6 +2211,8 @@ team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = { [TEAM_ATTR_OPTION_CHANGED] = { .type = NLA_FLAG }, [TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 }, [TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY }, + [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32 }, + [TEAM_ATTR_OPTION_ARRAY_INDEX] = { .type = NLA_U32 }, }; static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 78698b764d9eee18351f27751d5834f7026204f4..e6a3b3a34ad6d2b024ad790287e4a0ba09452393 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1134,6 +1134,13 @@ static void tun_net_init(struct net_device *dev) dev->max_mtu = MAX_MTU - dev->hard_header_len; } +static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile) +{ + struct sock *sk = tfile->socket.sk; + + return (tun->dev->flags & IFF_UP) && sock_writeable(sk); +} + /* Character device part */ /* Poll */ @@ -1156,10 +1163,14 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) if (!skb_array_empty(&tfile->tx_array)) mask |= POLLIN | POLLRDNORM; - if (tun->dev->flags & IFF_UP && - (sock_writeable(sk) || - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && - sock_writeable(sk)))) + /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to + * guarantee EPOLLOUT to be raised by either here or + * tun_sock_write_space(). Then process could get notification + * after it writes to a down device and meets -EIO. + */ + if (tun_sock_writeable(tun, tfile) || + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && + tun_sock_writeable(tun, tfile))) mask |= POLLOUT | POLLWRNORM; if (tun->dev->reg_state != NETREG_REGISTERED) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index db70d4c5778a621293701873977491137b350140..e028e03765a5619835ecef72eb885f79a2cd6f72 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -63,7 +63,6 @@ enum qmi_wwan_flags { enum qmi_wwan_quirks { QMI_WWAN_QUIRK_DTR = 1 << 0, /* needs "set DTR" request */ - QMI_WWAN_QUIRK_QUECTEL_DYNCFG = 1 << 1, /* check num. endpoints */ }; struct qmimux_hdr { @@ -275,6 +274,9 @@ static void qmi_wwan_netdev_setup(struct net_device *net) netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + /* Restoring min/max mtu values set originally by usbnet */ + net->min_mtu = 0; + net->max_mtu = ETH_MAX_MTU; clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } @@ -853,16 +855,6 @@ static const struct driver_info qmi_wwan_info_quirk_dtr = { .data = QMI_WWAN_QUIRK_DTR, }; -static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = { - .description = "WWAN/QMI device", - .flags = FLAG_WWAN | FLAG_SEND_ZLP, - .bind = qmi_wwan_bind, - .unbind = qmi_wwan_unbind, - .manage_power = qmi_wwan_manage_power, - .rx_fixup = qmi_wwan_rx_fixup, - .data = QMI_WWAN_QUIRK_DTR | QMI_WWAN_QUIRK_QUECTEL_DYNCFG, -}; - #define HUAWEI_VENDOR_ID 0x12D1 /* map QMI/wwan function by a fixed interface number */ @@ -883,14 +875,18 @@ static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = { #define QMI_GOBI_DEVICE(vend, prod) \ QMI_FIXED_INTF(vend, prod, 0) -/* Quectel does not use fixed interface numbers on at least some of their - * devices. We need to check the number of endpoints to ensure that we bind to - * the correct interface. +/* Many devices have QMI and DIAG functions which are distinguishable + * from other vendor specific functions by class, subclass and + * protocol all being 0xff. The DIAG function has exactly 2 endpoints + * and is silently rejected when probed. + * + * This makes it possible to match dynamically numbered QMI functions + * as seen on e.g. many Quectel modems. */ -#define QMI_QUIRK_QUECTEL_DYNCFG(vend, prod) \ +#define QMI_MATCH_FF_FF_FF(vend, prod) \ USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_VENDOR_SPEC, \ USB_SUBCLASS_VENDOR_SPEC, 0xff), \ - .driver_info = (unsigned long)&qmi_wwan_info_quirk_quectel_dyncfg + .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr static const struct usb_device_id products[] = { /* 1. CDC ECM like devices match on the control interface */ @@ -996,10 +992,10 @@ static const struct usb_device_id products[] = { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, }, - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ @@ -1143,6 +1139,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1435, 0xd181, 4)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 5)}, /* Wistron NeWeb D18Q1 */ {QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)}, /* Fibocom NL668 series */ + {QMI_FIXED_INTF(0x1690, 0x7588, 4)}, /* ASKEY WWHC050 */ {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ {QMI_FIXED_INTF(0x16d8, 0x6007, 0)}, /* CMOTech CHE-628S */ {QMI_FIXED_INTF(0x16d8, 0x6008, 0)}, /* CMOTech CMU-301 */ @@ -1287,6 +1284,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ + {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */ {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ @@ -1378,7 +1376,6 @@ static int qmi_wwan_probe(struct usb_interface *intf, { struct usb_device_id *id = (struct usb_device_id *)prod; struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc; - const struct driver_info *info; /* Workaround to enable dynamic IDs. This disables usbnet * blacklisting functionality. Which, if required, can be @@ -1414,12 +1411,8 @@ static int qmi_wwan_probe(struct usb_interface *intf, * different. Ignore the current interface if the number of endpoints * equals the number for the diag interface (two). */ - info = (void *)id->driver_info; - - if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) { - if (desc->bNumEndpoints == 2) - return -ENODEV; - } + if (desc->bNumEndpoints == 2) + return -ENODEV; return usbnet_probe(intf, id); } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index a7f9c1886bd4c42d946aa4b755606af5729e9598..cadf5ded45a9b387219c8f51b77f3eff38ac17b0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2696,6 +2696,8 @@ static u16 r8153_phy_status(struct r8152 *tp, u16 desired) } msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } return data; @@ -4055,7 +4057,10 @@ static void r8153_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); @@ -4170,7 +4175,10 @@ static void r8153b_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index e0cea5c05f0e27cbf3fd8f8f99a6a0c947f0d7ab..811fe0bde8a3a472e3aaca98a597b7779ca789f2 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -476,7 +476,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) return skb; - if (qdisc_tx_is_default(vrf_dev)) + if (qdisc_tx_is_default(vrf_dev) || + IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return vrf_ip6_out_direct(vrf_dev, sk, skb); return vrf_ip6_out_redirect(vrf_dev, skb); @@ -692,7 +693,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; - if (qdisc_tx_is_default(vrf_dev)) + if (qdisc_tx_is_default(vrf_dev) || + IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return vrf_ip_out_direct(vrf_dev, sk, skb); return vrf_ip_out_redirect(vrf_dev, skb); @@ -996,23 +998,24 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { int orig_iif = skb->skb_iif; - bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); - bool is_ndisc = ipv6_ndisc_frame(skb); + bool need_strict; - /* loopback, multicast & non-ND link-local traffic; do not push through - * packet taps again. Reset pkt_type for upper layers to process skb + /* loopback traffic; do not push through packet taps again. + * Reset pkt_type for upper layers to process skb */ - if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { + if (skb->pkt_type == PACKET_LOOPBACK) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IP6CB(skb)->flags |= IP6SKB_L3SLAVE; - if (skb->pkt_type == PACKET_LOOPBACK) - skb->pkt_type = PACKET_HOST; + skb->pkt_type = PACKET_HOST; goto out; } - /* if packet is NDISC then keep the ingress interface */ - if (!is_ndisc) { + /* if packet is NDISC or addressed to multicast or link-local + * then keep the ingress interface + */ + need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); + if (!ipv6_ndisc_frame(skb) && !need_strict) { vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 4d97a7b5fe3cabc074a28b20dd9a32afd686ded7..927d62c76a604c3214846e005e1203e82076061f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2454,10 +2454,19 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan, /* Setup stats when device is created */ static int vxlan_init(struct net_device *dev) { + struct vxlan_dev *vxlan = netdev_priv(dev); + int err; + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + err = gro_cells_init(&vxlan->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + return 0; } @@ -2717,8 +2726,6 @@ static void vxlan_setup(struct net_device *dev) vxlan->dev = dev; - gro_cells_init(&vxlan->gro_cells, dev); - for (h = 0; h < FDB_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vxlan->fdb_head[h]); } diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 571a1ff8f81f2606ab24036af373546251d7b65e..6a26cef6219350bf7c5c6fc5ec531769c6dce41d 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -240,6 +240,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) ret = -ENOMEM; goto free_riptr; } + if (riptr != (u16)riptr || tiptr != (u16)tiptr) { + dev_err(priv->dev, "MURAM allocation out of addressable range\n"); + ret = -ENOMEM; + goto free_tiptr; + } /* Set RIPTR, TIPTR */ iowrite16be(riptr, &priv->ucc_pram->riptr); diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index 6a505c26a3e74416c0461b677871a81e2b543041..a269ed63d90f7fd069b6e055ec209c10e5b6dffa 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -261,7 +261,7 @@ struct port { struct hss_plat_info *plat; buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS]; struct desc *desc_tab; /* coherent */ - u32 desc_tab_phys; + dma_addr_t desc_tab_phys; unsigned int id; unsigned int clock_type, clock_rate, loopback; unsigned int initialized, carrier; @@ -861,7 +861,7 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_dropped++; return NETDEV_TX_OK; } - memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4); + memcpy_swab32(mem, (u32 *)((uintptr_t)skb->data & ~3), bytes / 4); dev_kfree_skb(skb); #endif diff --git a/drivers/net/wimax/i2400m/usb-fw.c b/drivers/net/wimax/i2400m/usb-fw.c index 502c346aa790bf4f79218d816420f9997b69df3e..7d396c81ec3eb31d6aa4d451cce72bac9440a4aa 100644 --- a/drivers/net/wimax/i2400m/usb-fw.c +++ b/drivers/net/wimax/i2400m/usb-fw.c @@ -354,6 +354,7 @@ ssize_t i2400mu_bus_bm_wait_for_ack(struct i2400m *i2400m, usb_autopm_put_interface(i2400mu->usb_iface); d_fnend(8, dev, "(i2400m %p ack %p size %zu) = %ld\n", i2400m, ack, ack_size, (long) result); + usb_put_urb(¬if_urb); return result; error_exceeded: diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 72ad84fde5c1898ae4107e28442a527102ca511d..8e084670c3c239e6dbc74e35e82183e97fd8cd12 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1456,6 +1456,9 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) ath_chanctx_set_channel(sc, ctx, &hw->conf.chandef); } + if (changed & IEEE80211_CONF_CHANGE_POWER) + ath9k_set_txpower(sc, NULL); + mutex_unlock(&sc->mutex); ath9k_ps_restore(sc); diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index f1e3dad576292ef5b8e2f03a01a8c185c927a101..f435bd0f8b5b5efba00f8a7c17c4b18323d3987e 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -1304,8 +1304,9 @@ static void handle_irq_ucode_debug(struct b43legacy_wldev *dev) } /* Interrupt handler bottom-half */ -static void b43legacy_interrupt_tasklet(struct b43legacy_wldev *dev) +static void b43legacy_interrupt_tasklet(unsigned long data) { + struct b43legacy_wldev *dev = (struct b43legacy_wldev *)data; u32 reason; u32 dma_reason[ARRAY_SIZE(dev->dma_reason)]; u32 merged_dma_reason = 0; @@ -3775,7 +3776,7 @@ static int b43legacy_one_core_attach(struct ssb_device *dev, b43legacy_set_status(wldev, B43legacy_STAT_UNINIT); wldev->bad_frames_preempt = modparam_bad_frames_preempt; tasklet_init(&wldev->isr_tasklet, - (void (*)(unsigned long))b43legacy_interrupt_tasklet, + b43legacy_interrupt_tasklet, (unsigned long)wldev); if (modparam_pio) wldev->__using_pio = true; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 4c28b04ea60536dce86508a9bac3168c51015ab2..d198a8780b96670635c6a74b206b347f4a502028 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -1932,6 +1932,7 @@ static uint brcmf_sdio_readframes(struct brcmf_sdio *bus, uint maxframes) BRCMF_SDIO_FT_NORMAL)) { rd->len = 0; brcmu_pkt_buf_free_skb(pkt); + continue; } bus->sdcnt.rx_readahead_cnt++; if (rd->len != roundup(rd_new.len, 16)) { diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index 19c442cb93e4af2b50e72fe1c7b972cd0827c2f6..8fbdd7d4fd0c295406e63af57c2d2f036e109d1f 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -3220,8 +3220,9 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv) } } -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv) +static void ipw2100_irq_tasklet(unsigned long data) { + struct ipw2100_priv *priv = (struct ipw2100_priv *)data; struct net_device *dev = priv->net_dev; unsigned long flags; u32 inta, tmp; @@ -6027,7 +6028,7 @@ static void ipw2100_rf_kill(struct work_struct *work) spin_unlock_irqrestore(&priv->low_lock, flags); } -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv); +static void ipw2100_irq_tasklet(unsigned long data); static const struct net_device_ops ipw2100_netdev_ops = { .ndo_open = ipw2100_open, @@ -6157,7 +6158,7 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev, INIT_DELAYED_WORK(&priv->rf_kill, ipw2100_rf_kill); INIT_DELAYED_WORK(&priv->scan_event, ipw2100_scan_event); - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) + tasklet_init(&priv->irq_tasklet, ipw2100_irq_tasklet, (unsigned long)priv); /* NOTE: We do not start the deferred work for status checks yet */ diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index 8da87496cb587f2ed92977649c6c31d12f1f32e8..2d0734ab3f747de9618fdc5009833664b256fec9 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -1966,8 +1966,9 @@ static void notify_wx_assoc_event(struct ipw_priv *priv) wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL); } -static void ipw_irq_tasklet(struct ipw_priv *priv) +static void ipw_irq_tasklet(unsigned long data) { + struct ipw_priv *priv = (struct ipw_priv *)data; u32 inta, inta_mask, handled = 0; unsigned long flags; int rc = 0; @@ -10702,7 +10703,7 @@ static int ipw_setup_deferred_work(struct ipw_priv *priv) INIT_WORK(&priv->qos_activate, ipw_bg_qos_activate); #endif /* CONFIG_IPW2200_QOS */ - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) + tasklet_init(&priv->irq_tasklet, ipw_irq_tasklet, (unsigned long)priv); return ret; diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 329f3a63dadd6e5f8ab2c3991481cd0997b1e265..0fb81151a1327752a221a707e826f5de4eadbd9d 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -1399,8 +1399,9 @@ il3945_dump_nic_error_log(struct il_priv *il) } static void -il3945_irq_tasklet(struct il_priv *il) +il3945_irq_tasklet(unsigned long data) { + struct il_priv *il = (struct il_priv *)data; u32 inta, handled = 0; u32 inta_fh; unsigned long flags; @@ -3432,7 +3433,7 @@ il3945_setup_deferred_work(struct il_priv *il) setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il); tasklet_init(&il->irq_tasklet, - (void (*)(unsigned long))il3945_irq_tasklet, + il3945_irq_tasklet, (unsigned long)il); } diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index de9b6522c43f5dba3855c0b4e94f57dd4b3915c5..665e82effb03df3e12fa9f57bac3780419f204f4 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -4363,8 +4363,9 @@ il4965_synchronize_irq(struct il_priv *il) } static void -il4965_irq_tasklet(struct il_priv *il) +il4965_irq_tasklet(unsigned long data) { + struct il_priv *il = (struct il_priv *)data; u32 inta, handled = 0; u32 inta_fh; unsigned long flags; @@ -6264,7 +6265,7 @@ il4965_setup_deferred_work(struct il_priv *il) setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il); tasklet_init(&il->irq_tasklet, - (void (*)(unsigned long))il4965_irq_tasklet, + il4965_irq_tasklet, (unsigned long)il); } diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c index 8d5acda92a9b819eb00e159790f796bbcbe1cf27..6e6b124f0d5e35e21910111c2d36ea860a35877f 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.c +++ b/drivers/net/wireless/intel/iwlegacy/common.c @@ -717,7 +717,7 @@ il_eeprom_init(struct il_priv *il) u32 gp = _il_rd(il, CSR_EEPROM_GP); int sz; int ret; - u16 addr; + int addr; /* allocate eeprom */ sz = il->cfg->eeprom_size; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 8f3032b7174d38602f8fc15371cf8f6c0ce11100..b2e393c4fab58f44ffde4a19a02370b123e3c717 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -326,7 +326,8 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) } /* PHY_SKU section is mandatory in B0 */ - if (!mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { + if (mvm->trans->cfg->nvm_type == IWL_NVM_EXT && + !mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { IWL_ERR(mvm, "Can't parse phy_sku in B0, empty sections\n"); return NULL; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index 1232f63278eb64167263e55255cb95b54c27258d..319103f4b432e16cdefbca802a372fe7860dd28b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -739,7 +739,8 @@ static struct thermal_zone_device_ops tzone_ops = { static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm) { int i; - char name[] = "iwlwifi"; + char name[16]; + static atomic_t counter = ATOMIC_INIT(0); if (!iwl_mvm_is_tt_in_fw(mvm)) { mvm->tz_device.tzone = NULL; @@ -749,6 +750,7 @@ static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm) BUILD_BUG_ON(ARRAY_SIZE(name) >= THERMAL_NAME_LENGTH); + sprintf(name, "iwlwifi_%u", atomic_inc_return(&counter) & 0xFF); mvm->tz_device.tzone = thermal_zone_device_register(name, IWL_MAX_DTS_TRIPS, IWL_WRITABLE_TRIPS_MSK, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index dffa697d71e0f5160b0c34b1a53caee6842dde02..8a074a516fb2676d0df224c3a1c8dfdb2776aed7 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3023,6 +3023,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, spin_lock_init(&trans_pcie->reg_lock); mutex_init(&trans_pcie->mutex); init_waitqueue_head(&trans_pcie->ucode_write_waitq); + + trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!trans_pcie->rba.alloc_wq) { + ret = -ENOMEM; + goto out_free_trans; + } + INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); + trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page); if (!trans_pcie->tso_hdr_page) { ret = -ENOMEM; @@ -3195,10 +3204,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans_pcie->inta_mask = CSR_INI_SET_MASK; } - trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", - WQ_HIGHPRI | WQ_UNBOUND, 1); - INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); - #ifdef CONFIG_IWLWIFI_PCIE_RTPM trans->runtime_pm_mode = IWL_PLAT_PM_MODE_D0I3; #else @@ -3211,6 +3216,8 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, iwl_pcie_free_ict(trans); out_no_pci: free_percpu(trans_pcie->tso_hdr_page); + destroy_workqueue(trans_pcie->rba.alloc_wq); +out_free_trans: iwl_trans_free(trans); return ERR_PTR(ret); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index bbb39d6ec2ee30b8a92ec43432730bef27d27330..f37018d72b440e56499fe348e4e3ab93c6420eb7 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -1124,6 +1124,9 @@ void iwl_trans_pcie_dyn_txq_free(struct iwl_trans *trans, int queue) iwl_pcie_gen2_txq_unmap(trans, queue); + iwl_pcie_gen2_txq_free_memory(trans, trans_pcie->txq[queue]); + trans_pcie->txq[queue] = NULL; + IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", queue); } diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c index 1a8d8db80b05405de0a2f1a7160c56cb2fd0fdc3..486ca1ee306edd14045db1b524a05fb204974386 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ap.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c @@ -2568,7 +2568,7 @@ static int prism2_hostapd_add_sta(struct ap_data *ap, sta->supported_rates[0] = 2; if (sta->tx_supp_rates & WLAN_RATE_2M) sta->supported_rates[1] = 4; - if (sta->tx_supp_rates & WLAN_RATE_5M5) + if (sta->tx_supp_rates & WLAN_RATE_5M5) sta->supported_rates[2] = 11; if (sta->tx_supp_rates & WLAN_RATE_11M) sta->supported_rates[3] = 22; diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index 95015d74b1c0e2311912514bd7a0c3d5ea7f3528..5a64674a5c8da621206b45d6a4cae5e8376ab646 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -1364,7 +1364,8 @@ static int ezusb_init(struct hermes *hw) int retval; BUG_ON(in_interrupt()); - BUG_ON(!upriv); + if (!upriv) + return -EINVAL; upriv->reply_count = 0; /* Write the MAGIC number on the simulated registers to keep diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 27224dc264133aebd95bff20a2e068e4c5066dba..a8ec5b2c5abb3d07b76ea9107e827e13ee41c45f 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3134,9 +3134,9 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) param.no_vif = true; if (info->attrs[HWSIM_ATTR_RADIO_NAME]) { - hwname = kasprintf(GFP_KERNEL, "%.*s", - nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), - (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME])); + hwname = kstrndup((char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]), + nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), + GFP_KERNEL); if (!hwname) return -ENOMEM; param.hwname = hwname; @@ -3175,9 +3175,9 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_RADIO_ID]) { idx = nla_get_u32(info->attrs[HWSIM_ATTR_RADIO_ID]); } else if (info->attrs[HWSIM_ATTR_RADIO_NAME]) { - hwname = kasprintf(GFP_KERNEL, "%.*s", - nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), - (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME])); + hwname = kstrndup((char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]), + nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), + GFP_KERNEL); if (!hwname) return -ENOMEM; } else diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c index b5340af9fa5ecfd0a2719c0a7b14de9f7ff68f84..80d20fb6f3480e51bea75380a91bacd13eb9568d 100644 --- a/drivers/net/wireless/marvell/mwifiex/tdls.c +++ b/drivers/net/wireless/marvell/mwifiex/tdls.c @@ -897,7 +897,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, u8 *peer, *pos, *end; u8 i, action, basic; u16 cap = 0; - int ie_len = 0; + int ies_len = 0; if (len < (sizeof(struct ethhdr) + 3)) return; @@ -919,7 +919,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, pos = buf + sizeof(struct ethhdr) + 4; /* payload 1+ category 1 + action 1 + dialog 1 */ cap = get_unaligned_le16(pos); - ie_len = len - sizeof(struct ethhdr) - TDLS_REQ_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_REQ_FIX_LEN; pos += 2; break; @@ -929,7 +929,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, /* payload 1+ category 1 + action 1 + dialog 1 + status code 2*/ pos = buf + sizeof(struct ethhdr) + 6; cap = get_unaligned_le16(pos); - ie_len = len - sizeof(struct ethhdr) - TDLS_RESP_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_RESP_FIX_LEN; pos += 2; break; @@ -937,7 +937,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, if (len < (sizeof(struct ethhdr) + TDLS_CONFIRM_FIX_LEN)) return; pos = buf + sizeof(struct ethhdr) + TDLS_CONFIRM_FIX_LEN; - ie_len = len - sizeof(struct ethhdr) - TDLS_CONFIRM_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_CONFIRM_FIX_LEN; break; default: mwifiex_dbg(priv->adapter, ERROR, "Unknown TDLS frame type.\n"); @@ -950,33 +950,33 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, sta_ptr->tdls_cap.capab = cpu_to_le16(cap); - for (end = pos + ie_len; pos + 1 < end; pos += 2 + pos[1]) { - if (pos + 2 + pos[1] > end) + for (end = pos + ies_len; pos + 1 < end; pos += 2 + pos[1]) { + u8 ie_len = pos[1]; + + if (pos + 2 + ie_len > end) break; switch (*pos) { case WLAN_EID_SUPP_RATES: - if (pos[1] > 32) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates)) return; - sta_ptr->tdls_cap.rates_len = pos[1]; - for (i = 0; i < pos[1]; i++) + sta_ptr->tdls_cap.rates_len = ie_len; + for (i = 0; i < ie_len; i++) sta_ptr->tdls_cap.rates[i] = pos[i + 2]; break; case WLAN_EID_EXT_SUPP_RATES: - if (pos[1] > 32) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates)) return; basic = sta_ptr->tdls_cap.rates_len; - if (pos[1] > 32 - basic) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates) - basic) return; - for (i = 0; i < pos[1]; i++) + for (i = 0; i < ie_len; i++) sta_ptr->tdls_cap.rates[basic + i] = pos[i + 2]; - sta_ptr->tdls_cap.rates_len += pos[1]; + sta_ptr->tdls_cap.rates_len += ie_len; break; case WLAN_EID_HT_CAPABILITY: - if (pos > end - sizeof(struct ieee80211_ht_cap) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_ht_cap)) + if (ie_len != sizeof(struct ieee80211_ht_cap)) return; /* copy the ie's value into ht_capb*/ memcpy((u8 *)&sta_ptr->tdls_cap.ht_capb, pos + 2, @@ -984,59 +984,45 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, sta_ptr->is_11n_enabled = 1; break; case WLAN_EID_HT_OPERATION: - if (pos > end - - sizeof(struct ieee80211_ht_operation) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_ht_operation)) + if (ie_len != sizeof(struct ieee80211_ht_operation)) return; /* copy the ie's value into ht_oper*/ memcpy(&sta_ptr->tdls_cap.ht_oper, pos + 2, sizeof(struct ieee80211_ht_operation)); break; case WLAN_EID_BSS_COEX_2040: - if (pos > end - 3) - return; - if (pos[1] != 1) + if (ie_len != sizeof(pos[2])) return; sta_ptr->tdls_cap.coex_2040 = pos[2]; break; case WLAN_EID_EXT_CAPABILITY: - if (pos > end - sizeof(struct ieee_types_header)) - return; - if (pos[1] < sizeof(struct ieee_types_header)) + if (ie_len < sizeof(struct ieee_types_header)) return; - if (pos[1] > 8) + if (ie_len > 8) return; memcpy((u8 *)&sta_ptr->tdls_cap.extcap, pos, sizeof(struct ieee_types_header) + - min_t(u8, pos[1], 8)); + min_t(u8, ie_len, 8)); break; case WLAN_EID_RSN: - if (pos > end - sizeof(struct ieee_types_header)) + if (ie_len < sizeof(struct ieee_types_header)) return; - if (pos[1] < sizeof(struct ieee_types_header)) - return; - if (pos[1] > IEEE_MAX_IE_SIZE - + if (ie_len > IEEE_MAX_IE_SIZE - sizeof(struct ieee_types_header)) return; memcpy((u8 *)&sta_ptr->tdls_cap.rsn_ie, pos, sizeof(struct ieee_types_header) + - min_t(u8, pos[1], IEEE_MAX_IE_SIZE - + min_t(u8, ie_len, IEEE_MAX_IE_SIZE - sizeof(struct ieee_types_header))); break; case WLAN_EID_QOS_CAPA: - if (pos > end - 3) - return; - if (pos[1] != 1) + if (ie_len != sizeof(pos[2])) return; sta_ptr->tdls_cap.qos_info = pos[2]; break; case WLAN_EID_VHT_OPERATION: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - - sizeof(struct ieee80211_vht_operation) - 2) - return; - if (pos[1] != + if (ie_len != sizeof(struct ieee80211_vht_operation)) return; /* copy the ie's value into vhtoper*/ @@ -1046,10 +1032,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, break; case WLAN_EID_VHT_CAPABILITY: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - - sizeof(struct ieee80211_vht_cap) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_vht_cap)) + if (ie_len != sizeof(struct ieee80211_vht_cap)) return; /* copy the ie's value into vhtcap*/ memcpy((u8 *)&sta_ptr->tdls_cap.vhtcap, pos + 2, @@ -1059,9 +1042,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, break; case WLAN_EID_AID: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - 4) - return; - if (pos[1] != 2) + if (ie_len != sizeof(u16)) return; sta_ptr->tdls_cap.aid = get_unaligned_le16((pos + 2)); diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 457a0f725c8aa598c14ea80b38e6d4a6d4183c48..ab74f3155854059035013bb39857f73b1d8aa56a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -1091,13 +1091,15 @@ static irqreturn_t _rtl_pci_interrupt(int irq, void *dev_id) return ret; } -static void _rtl_pci_irq_tasklet(struct ieee80211_hw *hw) +static void _rtl_pci_irq_tasklet(unsigned long data) { + struct ieee80211_hw *hw = (struct ieee80211_hw *)data; _rtl_pci_tx_chk_waitq(hw); } -static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) +static void _rtl_pci_prepare_bcn_tasklet(unsigned long data) { + struct ieee80211_hw *hw = (struct ieee80211_hw *)data; struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); @@ -1223,10 +1225,10 @@ static void _rtl_pci_init_struct(struct ieee80211_hw *hw, /*task */ tasklet_init(&rtlpriv->works.irq_tasklet, - (void (*)(unsigned long))_rtl_pci_irq_tasklet, + _rtl_pci_irq_tasklet, (unsigned long)hw); tasklet_init(&rtlpriv->works.irq_prepare_bcn_tasklet, - (void (*)(unsigned long))_rtl_pci_prepare_bcn_tasklet, + _rtl_pci_prepare_bcn_tasklet, (unsigned long)hw); INIT_WORK(&rtlpriv->works.lps_change_work, rtl_lps_change_work_callback); diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index ec50027b0d8ba327b5b125249e1fdc142426b7e9..3195bae1685f94786c72d2fa3c73386b55a60c6a 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -192,7 +192,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type) const struct firmware *fw; struct sk_buff *skb; unsigned long len; - u8 max_size, payload_size; + int max_size, payload_size; int rc = 0; if ((type == NCI_PATCH_TYPE_OTP && !info->otp_patch) || @@ -215,8 +215,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type) while (len) { - payload_size = min_t(unsigned long, (unsigned long) max_size, - len); + payload_size = min_t(unsigned long, max_size, len); skb = nci_skb_alloc(ndev, (NCI_CTRL_HDR_SIZE + payload_size), GFP_KERNEL); diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 4b14740edb672c734239eb4c26282c036009d415..8ba5a6d6329e023d56c553becffdaa398c166149 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -236,6 +236,7 @@ static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy) out: gpiod_set_value_cansleep(phy->gpiod_en, !phy->en_polarity); + usleep_range(10000, 15000); } static void pn544_hci_i2c_enable_mode(struct pn544_i2c_phy *phy, int run_mode) diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 60ae382f50da9887ad900a14807835490eb6bee3..06bb226c62ef46e734d5d10887b0dbc38ce8ff7e 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -574,7 +574,7 @@ static void port100_tx_update_payload_len(void *_frame, int len) { struct port100_frame *frame = _frame; - frame->datalen = cpu_to_le16(le16_to_cpu(frame->datalen) + len); + le16_add_cpu(&frame->datalen, len); } static bool port100_rx_frame_is_valid(void *_frame) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 4587c97735600aa25ca1b05253dab7d576be9cde..66cc42a7dc6e600f168c7c3f254a6f8099fe07c1 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -952,8 +952,10 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, return -EFAULT; } - if (!desc || (desc->out_num + desc->in_num == 0) || - !test_bit(cmd, &cmd_mask)) + if (!desc || + (desc->out_num + desc->in_num == 0) || + cmd > ND_CMD_CALL || + !test_bit(cmd, &cmd_mask)) return -ENOTTY; /* fail write commands (when read-only) */ diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f543b9932c830ba0ad62af837362fd6f7bad1257..a760c449f4a90566db6025a363a9405a6f62ef80 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -889,8 +889,8 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, void *buffer, size_t buflen, u32 *result) { + union nvme_result res = { 0 }; struct nvme_command c; - union nvme_result res; int ret; memset(&c, 0, sizeof(c)); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9e4d2ecf736d563240aee9ca992359940a7a4709..058d542647dd5d1f6b8ba55b166c3a4cf03f89a4 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -337,8 +337,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, !template->ls_req || !template->fcp_io || !template->ls_abort || !template->fcp_abort || !template->max_hw_queues || !template->max_sgl_segments || - !template->max_dif_sgl_segments || !template->dma_boundary || - !template->module) { + !template->max_dif_sgl_segments || !template->dma_boundary) { ret = -EINVAL; goto out_reghost_failed; } @@ -1763,7 +1762,6 @@ nvme_fc_ctrl_free(struct kref *ref) { struct nvme_fc_ctrl *ctrl = container_of(ref, struct nvme_fc_ctrl, ref); - struct nvme_fc_lport *lport = ctrl->lport; unsigned long flags; if (ctrl->ctrl.tagset) { @@ -1789,7 +1787,6 @@ nvme_fc_ctrl_free(struct kref *ref) if (ctrl->ctrl.opts) nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); - module_put(lport->ops->module); } static void @@ -2768,15 +2765,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_fail; } - if (!try_module_get(lport->ops->module)) { - ret = -EUNATCH; - goto out_free_ctrl; - } - idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; - goto out_mod_put; + goto out_free_ctrl; } ctrl->ctrl.opts = opts; @@ -2923,8 +2915,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, out_free_ida: put_device(ctrl->dev); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); -out_mod_put: - module_put(lport->ops->module); out_free_ctrl: kfree(ctrl); out_fail: diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index b8fe8702065bc1f88d26f8f292ef321412ab3b99..096523d8dd422b511c17290065dbaa290debd877 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -693,7 +693,6 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) #define FCLOOP_DMABOUND_4G 0xFFFFFFFF static struct nvme_fc_port_template fctemplate = { - .module = THIS_MODULE, .localport_delete = fcloop_localport_delete, .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index fe26697d3bd724d048df132d95ab74797fc0f100..69da2f6896dae5261533ad11dcddfc75a002119d 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -259,6 +259,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) rc = of_mdiobus_register_phy(mdio, child, addr); if (rc && rc != -ENODEV) goto unregister; + break; } } } diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 4bf6a9db6ac0c2622cffcc56270a6a8318da4fe9..55c98f119df226ac1046d53e0566e56b22f237c3 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -887,10 +887,13 @@ static void __init of_unittest_platform_populate(void) of_platform_populate(np, match, NULL, &test_bus->dev); for_each_child_of_node(np, child) { - for_each_child_of_node(child, grandchild) - unittest(of_find_device_by_node(grandchild), + for_each_child_of_node(child, grandchild) { + pdev = of_find_device_by_node(grandchild); + unittest(pdev, "Could not create device for node '%s'\n", grandchild->name); + of_dev_put(pdev); + } } of_platform_depopulate(&test_bus->dev); diff --git a/drivers/pci/endpoint/pci-epc-mem.c b/drivers/pci/endpoint/pci-epc-mem.c index 83b7d5d3fc3e465d37f6eaf361f217279f8f8db7..60fbfe92e0ef8249cc5057f2d9a2bf1d48df58a0 100644 --- a/drivers/pci/endpoint/pci-epc-mem.c +++ b/drivers/pci/endpoint/pci-epc-mem.c @@ -90,6 +90,7 @@ int __pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_base, size_t size, mem->page_size = page_size; mem->pages = pages; mem->size = size; + mutex_init(&mem->lock); epc->mem = mem; @@ -133,7 +134,7 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, phys_addr_t *phys_addr, size_t size) { int pageno; - void __iomem *virt_addr; + void __iomem *virt_addr = NULL; struct pci_epc_mem *mem = epc->mem; unsigned int page_shift = ilog2(mem->page_size); int order; @@ -141,15 +142,18 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, size = ALIGN(size, mem->page_size); order = pci_epc_mem_get_order(mem, size); + mutex_lock(&mem->lock); pageno = bitmap_find_free_region(mem->bitmap, mem->pages, order); if (pageno < 0) - return NULL; + goto ret; *phys_addr = mem->phys_base + (pageno << page_shift); virt_addr = ioremap(*phys_addr, size); if (!virt_addr) bitmap_release_region(mem->bitmap, pageno, order); +ret: + mutex_unlock(&mem->lock); return virt_addr; } EXPORT_SYMBOL_GPL(pci_epc_mem_alloc_addr); @@ -175,7 +179,9 @@ void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr, pageno = (phys_addr - mem->phys_base) >> page_shift; size = ALIGN(size, mem->page_size); order = pci_epc_mem_get_order(mem, size); + mutex_lock(&mem->lock); bitmap_release_region(mem->bitmap, pageno, order); + mutex_unlock(&mem->lock); } EXPORT_SYMBOL_GPL(pci_epc_mem_free_addr); diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 0fd8e164339c3e6352af98eb5240e885c10e5103..0dc646c1bc3db9eb822153411bc5c4f61c029a5e 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -179,6 +179,7 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) failed2: sysfs_remove_link(&dev->dev.kobj, buf); failed1: + pci_stop_and_remove_bus_device(virtfn); pci_dev_put(dev); pci_stop_and_remove_bus_device(virtfn); failed0: diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 2b95984c6a57b61d65507ef913198da9b07f8e89..9146f01ffeb251496b4a59771674dcacf5b53bc0 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -80,6 +80,7 @@ struct pcie_link_state { u32 clkpm_capable:1; /* Clock PM capable? */ u32 clkpm_enabled:1; /* Current Clock PM state */ u32 clkpm_default:1; /* Default Clock PM state by BIOS */ + u32 clkpm_disable:1; /* Clock PM disabled */ /* Exit latencies */ struct aspm_latency latency_up; /* Upstream direction exit latency */ @@ -177,8 +178,11 @@ static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable) static void pcie_set_clkpm(struct pcie_link_state *link, int enable) { - /* Don't enable Clock PM if the link is not Clock PM capable */ - if (!link->clkpm_capable) + /* + * Don't enable Clock PM if the link is not Clock PM capable + * or Clock PM is disabled + */ + if (!link->clkpm_capable || link->clkpm_disable) enable = 0; /* Need nothing if the specified equals to current state */ if (link->clkpm_enabled == enable) @@ -208,7 +212,8 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) } link->clkpm_enabled = enabled; link->clkpm_default = enabled; - link->clkpm_capable = (blacklist) ? 0 : capable; + link->clkpm_capable = capable; + link->clkpm_disable = blacklist ? 1 : 0; } static bool pcie_retrain_link(struct pcie_link_state *link) @@ -693,9 +698,9 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) /* Enable what we need to enable */ pci_clear_and_set_dword(parent, up_cap_ptr + PCI_L1SS_CTL1, - PCI_L1SS_CAP_L1_PM_SS, val); + PCI_L1SS_CTL1_L1SS_MASK, val); pci_clear_and_set_dword(child, dw_cap_ptr + PCI_L1SS_CTL1, - PCI_L1SS_CAP_L1_PM_SS, val); + PCI_L1SS_CTL1_L1SS_MASK, val); } static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val) @@ -1052,10 +1057,9 @@ static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) link->aspm_disable |= ASPM_STATE_L1; pcie_config_aspm_link(link, policy_to_aspm_state(link)); - if (state & PCIE_LINK_STATE_CLKPM) { - link->clkpm_capable = 0; - pcie_set_clkpm(link, 0); - } + if (state & PCIE_LINK_STATE_CLKPM) + link->clkpm_disable = 1; + pcie_set_clkpm(link, policy_to_clkpm_state(link)); mutex_unlock(&aspm_lock); if (sem) up_read(&pci_bus_sem); diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index bf229b442e723745d35d355212bca36a4cafdeab..6ef0d4b756f0994d503565f2be38fdd83be7f266 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -412,7 +412,7 @@ static int mrpc_queue_cmd(struct switchtec_user *stuser) kref_get(&stuser->kref); stuser->read_len = sizeof(stuser->data); stuser_set_state(stuser, MRPC_QUEUED); - init_completion(&stuser->comp); + reinit_completion(&stuser->comp); list_add_tail(&stuser->list, &stdev->mrpc_queue); mrpc_cmd_submit(stdev); diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 3303dd8d8eb5718f96de5326949ba89a60e9f32e..604e549a9a47d2986ddd13793b5c847ff12c574d 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -25,8 +25,6 @@ static int arm_pmu_acpi_register_irq(int cpu) int gsi, trigger; gicc = acpi_cpu_get_madt_gicc(cpu); - if (WARN_ON(!gicc)) - return -EINVAL; gsi = gicc->performance_interrupt; @@ -65,11 +63,10 @@ static void arm_pmu_acpi_unregister_irq(int cpu) int gsi; gicc = acpi_cpu_get_madt_gicc(cpu); - if (!gicc) - return; gsi = gicc->performance_interrupt; - acpi_unregister_gsi(gsi); + if (gsi) + acpi_unregister_gsi(gsi); } static int arm_pmu_acpi_parse_irqs(void) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index c55517312485d5ab3f8257e2b7ada1fbe5856778..08ea74177de296d877f1d1fa11a37d30065b9625 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2031,7 +2031,6 @@ static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev) return PTR_ERR(pctldev->p); } - kref_get(&pctldev->p->users); pctldev->hog_default = pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT); if (IS_ERR(pctldev->hog_default)) { diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 9df5d29d708da14403b386982635c80478fa3592..4fb3e44f91331acab6e70dcd6d5817b796d64742 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -958,7 +958,13 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned int offset) raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); - value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); + + /* Do not clear direct-irq enabled IRQs (from gpio_disable_free) */ + if (value & BYT_DIRECT_IRQ_EN) + /* nothing to do */ ; + else + value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); + writel(value, reg); raw_spin_unlock_irqrestore(&byt_lock, flags); } diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c index 36c14b85fc7cde0caf54330423c38d4092bde04a..8db182067ecb0ee94d432f686ede43c51794ad23 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c @@ -158,8 +158,8 @@ static const unsigned int sdio_d0_pins[] = { PIN(GPIOX_0, EE_OFF) }; static const unsigned int sdio_d1_pins[] = { PIN(GPIOX_1, EE_OFF) }; static const unsigned int sdio_d2_pins[] = { PIN(GPIOX_2, EE_OFF) }; static const unsigned int sdio_d3_pins[] = { PIN(GPIOX_3, EE_OFF) }; -static const unsigned int sdio_cmd_pins[] = { PIN(GPIOX_4, EE_OFF) }; -static const unsigned int sdio_clk_pins[] = { PIN(GPIOX_5, EE_OFF) }; +static const unsigned int sdio_clk_pins[] = { PIN(GPIOX_4, EE_OFF) }; +static const unsigned int sdio_cmd_pins[] = { PIN(GPIOX_5, EE_OFF) }; static const unsigned int sdio_irq_pins[] = { PIN(GPIOX_7, EE_OFF) }; static const unsigned int nand_ce0_pins[] = { PIN(BOOT_8, EE_OFF) }; diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7264.c b/drivers/pinctrl/sh-pfc/pfc-sh7264.c index e1c34e19222ee7bcf12d3a98f348b9d749b312fe..3ddb9565ed8044486308ef1abe631568ffec55bd 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7264.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7264.c @@ -500,17 +500,15 @@ enum { SD_WP_MARK, SD_CLK_MARK, SD_CMD_MARK, CRX0_MARK, CRX1_MARK, CTX0_MARK, CTX1_MARK, + CRX0_CRX1_MARK, CTX0_CTX1_MARK, PWM1A_MARK, PWM1B_MARK, PWM1C_MARK, PWM1D_MARK, PWM1E_MARK, PWM1F_MARK, PWM1G_MARK, PWM1H_MARK, PWM2A_MARK, PWM2B_MARK, PWM2C_MARK, PWM2D_MARK, PWM2E_MARK, PWM2F_MARK, PWM2G_MARK, PWM2H_MARK, IERXD_MARK, IETXD_MARK, - CRX0_CRX1_MARK, WDTOVF_MARK, - CRX0X1_MARK, - /* DMAC */ TEND0_MARK, DACK0_MARK, DREQ0_MARK, TEND1_MARK, DACK1_MARK, DREQ1_MARK, @@ -998,12 +996,12 @@ static const u16 pinmux_data[] = { PINMUX_DATA(PJ3_DATA, PJ3MD_00), PINMUX_DATA(CRX1_MARK, PJ3MD_01), - PINMUX_DATA(CRX0X1_MARK, PJ3MD_10), + PINMUX_DATA(CRX0_CRX1_MARK, PJ3MD_10), PINMUX_DATA(IRQ1_PJ_MARK, PJ3MD_11), PINMUX_DATA(PJ2_DATA, PJ2MD_000), PINMUX_DATA(CTX1_MARK, PJ2MD_001), - PINMUX_DATA(CRX0_CRX1_MARK, PJ2MD_010), + PINMUX_DATA(CTX0_CTX1_MARK, PJ2MD_010), PINMUX_DATA(CS2_MARK, PJ2MD_011), PINMUX_DATA(SCK0_MARK, PJ2MD_100), PINMUX_DATA(LCD_M_DISP_MARK, PJ2MD_101), @@ -1248,6 +1246,7 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(CTX1), GPIO_FN(CRX1), GPIO_FN(CTX0), + GPIO_FN(CTX0_CTX1), GPIO_FN(CRX0), GPIO_FN(CRX0_CRX1), diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7269.c b/drivers/pinctrl/sh-pfc/pfc-sh7269.c index cfdb4fc177c3e2133cf8b59afce5b7fbcc91a28b..3df0c0d139d08f6bd889084373ecd81610f5b618 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7269.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7269.c @@ -740,13 +740,12 @@ enum { CRX0_MARK, CTX0_MARK, CRX1_MARK, CTX1_MARK, CRX2_MARK, CTX2_MARK, - CRX0_CRX1_MARK, - CRX0_CRX1_CRX2_MARK, - CTX0CTX1CTX2_MARK, + CRX0_CRX1_MARK, CTX0_CTX1_MARK, + CRX0_CRX1_CRX2_MARK, CTX0_CTX1_CTX2_MARK, CRX1_PJ22_MARK, CTX1_PJ23_MARK, CRX2_PJ20_MARK, CTX2_PJ21_MARK, - CRX0CRX1_PJ22_MARK, - CRX0CRX1CRX2_PJ20_MARK, + CRX0_CRX1_PJ22_MARK, CTX0_CTX1_PJ23_MARK, + CRX0_CRX1_CRX2_PJ20_MARK, CTX0_CTX1_CTX2_PJ21_MARK, /* VDC */ DV_CLK_MARK, @@ -824,6 +823,7 @@ static const u16 pinmux_data[] = { PINMUX_DATA(CS3_MARK, PC8MD_001), PINMUX_DATA(TXD7_MARK, PC8MD_010), PINMUX_DATA(CTX1_MARK, PC8MD_011), + PINMUX_DATA(CTX0_CTX1_MARK, PC8MD_100), PINMUX_DATA(PC7_DATA, PC7MD_000), PINMUX_DATA(CKE_MARK, PC7MD_001), @@ -836,11 +836,12 @@ static const u16 pinmux_data[] = { PINMUX_DATA(CAS_MARK, PC6MD_001), PINMUX_DATA(SCK7_MARK, PC6MD_010), PINMUX_DATA(CTX0_MARK, PC6MD_011), + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC6MD_100), PINMUX_DATA(PC5_DATA, PC5MD_000), PINMUX_DATA(RAS_MARK, PC5MD_001), PINMUX_DATA(CRX0_MARK, PC5MD_011), - PINMUX_DATA(CTX0CTX1CTX2_MARK, PC5MD_100), + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC5MD_100), PINMUX_DATA(IRQ0_PC_MARK, PC5MD_101), PINMUX_DATA(PC4_DATA, PC4MD_00), @@ -1292,30 +1293,32 @@ static const u16 pinmux_data[] = { PINMUX_DATA(LCD_DATA23_PJ23_MARK, PJ23MD_010), PINMUX_DATA(LCD_TCON6_MARK, PJ23MD_011), PINMUX_DATA(IRQ3_PJ_MARK, PJ23MD_100), - PINMUX_DATA(CTX1_MARK, PJ23MD_101), + PINMUX_DATA(CTX1_PJ23_MARK, PJ23MD_101), + PINMUX_DATA(CTX0_CTX1_PJ23_MARK, PJ23MD_110), PINMUX_DATA(PJ22_DATA, PJ22MD_000), PINMUX_DATA(DV_DATA22_MARK, PJ22MD_001), PINMUX_DATA(LCD_DATA22_PJ22_MARK, PJ22MD_010), PINMUX_DATA(LCD_TCON5_MARK, PJ22MD_011), PINMUX_DATA(IRQ2_PJ_MARK, PJ22MD_100), - PINMUX_DATA(CRX1_MARK, PJ22MD_101), - PINMUX_DATA(CRX0_CRX1_MARK, PJ22MD_110), + PINMUX_DATA(CRX1_PJ22_MARK, PJ22MD_101), + PINMUX_DATA(CRX0_CRX1_PJ22_MARK, PJ22MD_110), PINMUX_DATA(PJ21_DATA, PJ21MD_000), PINMUX_DATA(DV_DATA21_MARK, PJ21MD_001), PINMUX_DATA(LCD_DATA21_PJ21_MARK, PJ21MD_010), PINMUX_DATA(LCD_TCON4_MARK, PJ21MD_011), PINMUX_DATA(IRQ1_PJ_MARK, PJ21MD_100), - PINMUX_DATA(CTX2_MARK, PJ21MD_101), + PINMUX_DATA(CTX2_PJ21_MARK, PJ21MD_101), + PINMUX_DATA(CTX0_CTX1_CTX2_PJ21_MARK, PJ21MD_110), PINMUX_DATA(PJ20_DATA, PJ20MD_000), PINMUX_DATA(DV_DATA20_MARK, PJ20MD_001), PINMUX_DATA(LCD_DATA20_PJ20_MARK, PJ20MD_010), PINMUX_DATA(LCD_TCON3_MARK, PJ20MD_011), PINMUX_DATA(IRQ0_PJ_MARK, PJ20MD_100), - PINMUX_DATA(CRX2_MARK, PJ20MD_101), - PINMUX_DATA(CRX0CRX1CRX2_PJ20_MARK, PJ20MD_110), + PINMUX_DATA(CRX2_PJ20_MARK, PJ20MD_101), + PINMUX_DATA(CRX0_CRX1_CRX2_PJ20_MARK, PJ20MD_110), PINMUX_DATA(PJ19_DATA, PJ19MD_000), PINMUX_DATA(DV_DATA19_MARK, PJ19MD_001), @@ -1666,12 +1669,24 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(WDTOVF), /* CAN */ + GPIO_FN(CTX2), + GPIO_FN(CRX2), GPIO_FN(CTX1), GPIO_FN(CRX1), GPIO_FN(CTX0), GPIO_FN(CRX0), + GPIO_FN(CTX0_CTX1), GPIO_FN(CRX0_CRX1), + GPIO_FN(CTX0_CTX1_CTX2), GPIO_FN(CRX0_CRX1_CRX2), + GPIO_FN(CTX2_PJ21), + GPIO_FN(CRX2_PJ20), + GPIO_FN(CTX1_PJ23), + GPIO_FN(CRX1_PJ22), + GPIO_FN(CTX0_CTX1_PJ23), + GPIO_FN(CRX0_CRX1_PJ22), + GPIO_FN(CTX0_CTX1_CTX2_PJ21), + GPIO_FN(CRX0_CRX1_CRX2_PJ20), /* DMAC */ GPIO_FN(TEND0), diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 74997194fd88081a901daf9336f15ca4293b0e22..92205b90c25cbbf1c42d5d846bbc692dac6fa1a3 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -443,6 +443,14 @@ static const struct dmi_system_id critclk_systems[] = { DMI_MATCH(DMI_PRODUCT_NAME, "3I380D"), }, }, + { + /* pmc_plt_clk* - are used for ethernet controllers */ + .ident = "Lex 2I385SW", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Lex BayTrail"), + DMI_MATCH(DMI_PRODUCT_NAME, "2I385SW"), + }, + }, { /* pmc_plt_clk* - are used for ethernet controllers */ .ident = "Beckhoff CB3163", diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index 51f0961ecf3e015c5ff9752d8999ac1281c48e34..a7d8cadf172cb24704b8a840b5cbf887cb4d2724 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -1842,7 +1842,10 @@ int bq27xxx_battery_setup(struct bq27xxx_device_info *di) di->bat = power_supply_register_no_ws(di->dev, psy_desc, &psy_cfg); if (IS_ERR(di->bat)) { - dev_err(di->dev, "failed to register battery\n"); + if (PTR_ERR(di->bat) == -EPROBE_DEFER) + dev_dbg(di->dev, "failed to register battery, deferring probe\n"); + else + dev_err(di->dev, "failed to register battery\n"); return PTR_ERR(di->bat); } diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index e702131d8ebd2dbc5131102e81b528d5b6563136..88bfbb203e3be1e379ac379d2d983f432765fc88 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -60,8 +60,7 @@ static const char * const power_supply_charge_type_text[] = { static const char * const power_supply_health_text[] = { "Unknown", "Good", "Overheat", "Dead", "Over voltage", "Unspecified failure", "Cold", "Watchdog timer expire", - "Safety timer expire", - "Warm", "Cool", "Hot" + "Safety timer expire", "Over current", "Warm", "Cool", "Hot" }; static const char * const power_supply_technology_text[] = { diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c index db001cba937fd8617fc81720e21c30601f2bfec5..e340ad79a1ec95c060af20691832a346c1b49e26 100644 --- a/drivers/pwm/pwm-bcm2835.c +++ b/drivers/pwm/pwm-bcm2835.c @@ -166,6 +166,7 @@ static int bcm2835_pwm_probe(struct platform_device *pdev) pc->chip.dev = &pdev->dev; pc->chip.ops = &bcm2835_pwm_ops; + pc->chip.base = -1; pc->chip.npwm = 2; pc->chip.of_xlate = of_pwm_xlate_with_flags; pc->chip.of_pwm_n_cells = 3; diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 5ad42f33e70c11d8efd0ef4a545f01bf948e3c76..2e15acf13893d98cde13b1bc972a9ef4e33ca370 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -337,6 +337,11 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev) static int pwm_omap_dmtimer_remove(struct platform_device *pdev) { struct pwm_omap_dmtimer_chip *omap = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&omap->chip); + if (ret) + return ret; if (pm_runtime_active(&omap->dm_timer_pdev->dev)) omap->pdata->stop(omap->dm_timer); @@ -345,7 +350,7 @@ static int pwm_omap_dmtimer_remove(struct platform_device *pdev) mutex_destroy(&omap->mutex); - return pwmchip_remove(&omap->chip); + return 0; } static const struct of_device_id pwm_omap_dmtimer_of_match[] = { diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 567f5e2771c47288488063f49363fd3e0b4eaab7..259fd58812aed13f293c52105ec415a6f48ca12c 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * Because the PCA9685 has only one prescaler per chip, changing the period of @@ -85,6 +86,7 @@ struct pca9685 { #if IS_ENABLED(CONFIG_GPIOLIB) struct mutex lock; struct gpio_chip gpio; + DECLARE_BITMAP(pwms_inuse, PCA9685_MAXCHAN + 1); #endif }; @@ -94,51 +96,51 @@ static inline struct pca9685 *to_pca(struct pwm_chip *chip) } #if IS_ENABLED(CONFIG_GPIOLIB) -static int pca9685_pwm_gpio_request(struct gpio_chip *gpio, unsigned int offset) +static bool pca9685_pwm_test_and_set_inuse(struct pca9685 *pca, int pwm_idx) { - struct pca9685 *pca = gpiochip_get_data(gpio); - struct pwm_device *pwm; + bool is_inuse; mutex_lock(&pca->lock); - - pwm = &pca->chip.pwms[offset]; - - if (pwm->flags & (PWMF_REQUESTED | PWMF_EXPORTED)) { - mutex_unlock(&pca->lock); - return -EBUSY; + if (pwm_idx >= PCA9685_MAXCHAN) { + /* + * "all LEDs" channel: + * pretend already in use if any of the PWMs are requested + */ + if (!bitmap_empty(pca->pwms_inuse, PCA9685_MAXCHAN)) { + is_inuse = true; + goto out; + } + } else { + /* + * regular channel: + * pretend already in use if the "all LEDs" channel is requested + */ + if (test_bit(PCA9685_MAXCHAN, pca->pwms_inuse)) { + is_inuse = true; + goto out; + } } - - pwm_set_chip_data(pwm, (void *)1); - + is_inuse = test_and_set_bit(pwm_idx, pca->pwms_inuse); +out: mutex_unlock(&pca->lock); - pm_runtime_get_sync(pca->chip.dev); - return 0; + return is_inuse; } -static bool pca9685_pwm_is_gpio(struct pca9685 *pca, struct pwm_device *pwm) +static void pca9685_pwm_clear_inuse(struct pca9685 *pca, int pwm_idx) { - bool is_gpio = false; - mutex_lock(&pca->lock); + clear_bit(pwm_idx, pca->pwms_inuse); + mutex_unlock(&pca->lock); +} - if (pwm->hwpwm >= PCA9685_MAXCHAN) { - unsigned int i; - - /* - * Check if any of the GPIOs are requested and in that case - * prevent using the "all LEDs" channel. - */ - for (i = 0; i < pca->gpio.ngpio; i++) - if (gpiochip_is_requested(&pca->gpio, i)) { - is_gpio = true; - break; - } - } else if (pwm_get_chip_data(pwm)) { - is_gpio = true; - } +static int pca9685_pwm_gpio_request(struct gpio_chip *gpio, unsigned int offset) +{ + struct pca9685 *pca = gpiochip_get_data(gpio); - mutex_unlock(&pca->lock); - return is_gpio; + if (pca9685_pwm_test_and_set_inuse(pca, offset)) + return -EBUSY; + pm_runtime_get_sync(pca->chip.dev); + return 0; } static int pca9685_pwm_gpio_get(struct gpio_chip *gpio, unsigned int offset) @@ -170,13 +172,10 @@ static void pca9685_pwm_gpio_set(struct gpio_chip *gpio, unsigned int offset, static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset) { struct pca9685 *pca = gpiochip_get_data(gpio); - struct pwm_device *pwm; pca9685_pwm_gpio_set(gpio, offset, 0); pm_runtime_put(pca->chip.dev); - mutex_lock(&pca->lock); - pwm = &pca->chip.pwms[offset]; - mutex_unlock(&pca->lock); + pca9685_pwm_clear_inuse(pca, offset); } static int pca9685_pwm_gpio_get_direction(struct gpio_chip *chip, @@ -228,12 +227,17 @@ static int pca9685_pwm_gpio_probe(struct pca9685 *pca) return devm_gpiochip_add_data(dev, &pca->gpio, pca); } #else -static inline bool pca9685_pwm_is_gpio(struct pca9685 *pca, - struct pwm_device *pwm) +static inline bool pca9685_pwm_test_and_set_inuse(struct pca9685 *pca, + int pwm_idx) { return false; } +static inline void +pca9685_pwm_clear_inuse(struct pca9685 *pca, int pwm_idx) +{ +} + static inline int pca9685_pwm_gpio_probe(struct pca9685 *pca) { return 0; @@ -417,7 +421,7 @@ static int pca9685_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) { struct pca9685 *pca = to_pca(chip); - if (pca9685_pwm_is_gpio(pca, pwm)) + if (pca9685_pwm_test_and_set_inuse(pca, pwm->hwpwm)) return -EBUSY; pm_runtime_get_sync(chip->dev); @@ -426,8 +430,11 @@ static int pca9685_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) static void pca9685_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { + struct pca9685 *pca = to_pca(chip); + pca9685_pwm_disable(chip, pwm); pm_runtime_put(chip->dev); + pca9685_pwm_clear_inuse(pca, pwm->hwpwm); } static const struct pwm_ops pca9685_pwm_ops = { diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index 0fcf94ffad321d8bb10911c6cdb65eebd8c8161b..c298bec25a90a30670845bc17cb0316e7112f99d 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -236,24 +236,28 @@ static int rcar_pwm_probe(struct platform_device *pdev) rcar_pwm->chip.base = -1; rcar_pwm->chip.npwm = 1; + pm_runtime_enable(&pdev->dev); + ret = pwmchip_add(&rcar_pwm->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to register PWM chip: %d\n", ret); + pm_runtime_disable(&pdev->dev); return ret; } - pm_runtime_enable(&pdev->dev); - return 0; } static int rcar_pwm_remove(struct platform_device *pdev) { struct rcar_pwm_chip *rcar_pwm = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&rcar_pwm->chip); pm_runtime_disable(&pdev->dev); - return pwmchip_remove(&rcar_pwm->chip); + return ret; } static const struct of_device_id rcar_pwm_of_table[] = { diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c index 29267d12fb4c9d3cf9283c38b114b00430704b09..9c7962f2f0aa4c058f497c58e9107e02e18168cf 100644 --- a/drivers/pwm/pwm-renesas-tpu.c +++ b/drivers/pwm/pwm-renesas-tpu.c @@ -423,16 +423,17 @@ static int tpu_probe(struct platform_device *pdev) tpu->chip.base = -1; tpu->chip.npwm = TPU_CHANNEL_MAX; + pm_runtime_enable(&pdev->dev); + ret = pwmchip_add(&tpu->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to register PWM chip\n"); + pm_runtime_disable(&pdev->dev); return ret; } dev_info(&pdev->dev, "TPU PWM %d registered\n", tpu->pdev->id); - pm_runtime_enable(&pdev->dev); - return 0; } @@ -442,12 +443,10 @@ static int tpu_remove(struct platform_device *pdev) int ret; ret = pwmchip_remove(&tpu->chip); - if (ret) - return ret; pm_runtime_disable(&pdev->dev); - return 0; + return ret; } #ifdef CONFIG_OF diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index 213b68743cc89bd00571a12e8870a36434dd43cc..92498ac503035c54c3742929fd2978833226fb5d 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -714,7 +714,7 @@ static int rk808_regulator_dt_parse_pdata(struct device *dev, } if (!pdata->dvs_gpio[i]) { - dev_warn(dev, "there is no dvs%d gpio\n", i); + dev_info(dev, "there is no dvs%d gpio\n", i); continue; } diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index eab14b414bf0dda6e7461b7b05246f4842844090..8f4fa1a52f0578c05f6bd3046a90982d65e20453 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -288,7 +288,7 @@ void rproc_free_vring(struct rproc_vring *rvring) { int size = PAGE_ALIGN(vring_size(rvring->len, rvring->align)); struct rproc *rproc = rvring->rvdev->rproc; - int idx = rvring->rvdev->vring - rvring; + int idx = rvring - rvring->rvdev->vring; struct fw_rsc_vdev *rsc; dma_free_coherent(rproc->dev.parent, size, rvring->va, rvring->dma); @@ -1620,7 +1620,7 @@ static int __init remoteproc_init(void) return 0; } -module_init(remoteproc_init); +subsys_initcall(remoteproc_init); static void __exit remoteproc_exit(void) { diff --git a/drivers/rpmsg/qcom_glink_smem.c b/drivers/rpmsg/qcom_glink_smem.c index 600c3619051cf16c3e675b983bdb5b48c50470c2..71669b6cca6366b79b81136fd7d35c2f78fab0ea 100644 --- a/drivers/rpmsg/qcom_glink_smem.c +++ b/drivers/rpmsg/qcom_glink_smem.c @@ -231,6 +231,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent, ret = device_register(dev); if (ret) { pr_err("failed to register glink edge\n"); + put_device(dev); kfree(dev); return ERR_PTR(ret); } diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 623fdec0f9c3385491a5751e8d9d5ae6c635ca6d..acae8d23731f905f13f63a51453c343536fe9a13 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -323,6 +323,7 @@ config RTC_DRV_MAX6900 config RTC_DRV_MAX8907 tristate "Maxim MAX8907" depends on MFD_MAX8907 || COMPILE_TEST + select REGMAP_IRQ help If you say yes here you will get support for the RTC of Maxim MAX8907 PMIC. diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c index 7d3e5168fcefcdb1ad637e9091cb16e3e2893e20..efbbde7379f13e9130c90310321508e73bb39056 100644 --- a/drivers/rtc/rtc-88pm860x.c +++ b/drivers/rtc/rtc-88pm860x.c @@ -341,6 +341,10 @@ static int pm860x_rtc_probe(struct platform_device *pdev) info->dev = &pdev->dev; dev_set_drvdata(&pdev->dev, info); + info->rtc_dev = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(info->rtc_dev)) + return PTR_ERR(info->rtc_dev); + ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL, rtc_update_handler, IRQF_ONESHOT, "rtc", info); @@ -382,13 +386,11 @@ static int pm860x_rtc_probe(struct platform_device *pdev) } } - info->rtc_dev = devm_rtc_device_register(&pdev->dev, "88pm860x-rtc", - &pm860x_rtc_ops, THIS_MODULE); - ret = PTR_ERR(info->rtc_dev); - if (IS_ERR(info->rtc_dev)) { - dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret); + info->rtc_dev->ops = &pm860x_rtc_ops; + + ret = rtc_register_device(info->rtc_dev); + if (ret) return ret; - } /* * enable internal XO instead of internal 3.25MHz clock since it can diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index ae6506a8b4f5c8725536cbd5f69d4db934fd0d2e..b25a2ba5ac483843568ff084daffc992afcb356d 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -559,9 +559,7 @@ static const struct pinctrl_ops rtc_pinctrl_ops = { .dt_free_map = pinconf_generic_dt_free_map, }; -enum rtc_pin_config_param { - PIN_CONFIG_ACTIVE_HIGH = PIN_CONFIG_END + 1, -}; +#define PIN_CONFIG_ACTIVE_HIGH (PIN_CONFIG_END + 1) static const struct pinconf_generic_params rtc_params[] = { {"ti,active-high", PIN_CONFIG_ACTIVE_HIGH, 0}, diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 2a3f874a21d548ef6a04bca6e3a8955f3668d291..9cebff8e8d74001f38f0148cf9118a163f2a1c37 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -303,8 +303,10 @@ static void * cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) { struct ccwdev_iter *iter; + loff_t p = *offset; - if (*offset >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) + (*offset)++; + if (p >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) return NULL; iter = it; if (iter->devno == __MAX_SUBCHANNEL) { @@ -314,7 +316,6 @@ cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) return NULL; } else iter->devno++; - (*offset)++; return iter; } diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e5c32f4b5287ebc4da569d403475d5624bac43ef..d2203cd1781382f234886bae21ce146b877d293d 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -828,8 +828,10 @@ static void io_subchannel_register(struct ccw_device *cdev) * Now we know this subchannel will stay, we can throw * our delayed uevent. */ - dev_set_uevent_suppress(&sch->dev, 0); - kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + if (dev_get_uevent_suppress(&sch->dev)) { + dev_set_uevent_suppress(&sch->dev, 0); + kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + } /* make it known to the system */ ret = ccw_device_add(cdev); if (ret) { @@ -1037,8 +1039,11 @@ static int io_subchannel_probe(struct subchannel *sch) * Throw the delayed uevent for the subchannel, register * the ccw_device and exit. */ - dev_set_uevent_suppress(&sch->dev, 0); - kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + if (dev_get_uevent_suppress(&sch->dev)) { + /* should always be the case for the console */ + dev_set_uevent_suppress(&sch->dev, 0); + kobject_uevent(&sch->dev.kobj, KOBJ_ADD); + } cdev = sch_get_cdev(sch); rc = ccw_device_add(cdev); if (rc) { diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 8f90e4cea2545faef39920f0762f7355ded6989a..168f7c84edba38067d9b86bbd21ee5adc0d06c25 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -747,7 +747,7 @@ static void zfcp_erp_enqueue_ptp_port(struct zfcp_adapter *adapter) adapter->peer_d_id); if (IS_ERR(port)) /* error or port already attached */ return; - _zfcp_erp_port_reopen(port, 0, "ereptp1"); + zfcp_erp_port_reopen(port, 0, "ereptp1"); } static int zfcp_erp_adapter_strat_fsf_xconf(struct zfcp_erp_action *erp_action) diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c index 381846164003129458d4bab4020587f4276c67f6..fdbb0a3dc9b4f27298968c8043844aff83b7f8f2 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_core.c +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c @@ -2321,7 +2321,7 @@ ahc_find_syncrate(struct ahc_softc *ahc, u_int *period, * At some speeds, we only support * ST transfers. */ - if ((syncrate->sxfr_u2 & ST_SXFR) != 0) + if ((syncrate->sxfr_u2 & ST_SXFR) != 0) *ppr_options &= ~MSG_EXT_PPR_DT_REQ; break; } diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 35d54ee1c5c74464526f8b56c25c46efb6c9404e..b172f0a02083491382e8c72d0edc95cc06c131cf 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -9962,6 +9962,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, ioa_cfg->max_devs_supported = ipr_max_devs; if (ioa_cfg->sis64) { + host->max_channel = IPR_MAX_SIS64_BUSES; host->max_id = IPR_MAX_SIS64_TARGETS_PER_BUS; host->max_lun = IPR_MAX_SIS64_LUNS_PER_TARGET; if (ipr_max_devs > IPR_MAX_SIS64_DEVS) @@ -9970,6 +9971,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, + ((sizeof(struct ipr_config_table_entry64) * ioa_cfg->max_devs_supported))); } else { + host->max_channel = IPR_VSET_BUS; host->max_id = IPR_MAX_NUM_TARGETS_PER_BUS; host->max_lun = IPR_MAX_NUM_LUNS_PER_TARGET; if (ipr_max_devs > IPR_MAX_PHYSICAL_DEVS) @@ -9979,7 +9981,6 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, * ioa_cfg->max_devs_supported))); } - host->max_channel = IPR_VSET_BUS; host->unique_id = host->host_no; host->max_cmd_len = IPR_MAX_CDB_LEN; host->can_queue = ioa_cfg->max_cmds; diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 085e6c90f9e6a3255469757c2d6febbaa0d66e25..89b36987ff309f6fa08e410d1d918628281445ea 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1306,6 +1306,7 @@ struct ipr_resource_entry { #define IPR_ARRAY_VIRTUAL_BUS 0x1 #define IPR_VSET_VIRTUAL_BUS 0x2 #define IPR_IOAFP_VIRTUAL_BUS 0x3 +#define IPR_MAX_SIS64_BUSES 0x4 #define IPR_GET_RES_PHYS_LOC(res) \ (((res)->bus << 24) | ((res)->target << 8) | (res)->lun) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 7e3a77d3c6f018d62a2fd5b196c1373155bf0f33..e3ca16043f9af459d94c7cd5ece0360c0b045594 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -890,6 +890,10 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session) { struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); + struct iscsi_session *session = cls_session->dd_data; + + if (WARN_ON_ONCE(session->leadconn)) + return; iscsi_tcp_r2tpool_free(cls_session->dd_data); iscsi_session_teardown(cls_session); diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index bb9c1c01664390a31ddb7fd225825467bf95f134..28b50ab2fbb015e856e3595d862f4927b925363b 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -652,6 +652,8 @@ static void fc_disc_gpn_id_resp(struct fc_seq *sp, struct fc_frame *fp, } out: kref_put(&rdata->kref, fc_rport_destroy); + if (!IS_ERR(fp)) + fc_frame_free(fp); } /** diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index af937b91765e60d1cd8f1f8c9c18e4d5b2725b63..fcf4b4175d771a4711154f4c8ee55ba1f1fd2aeb 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1591,8 +1591,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* Declare and initialization an instance of the FC NVME template. */ static struct nvme_fc_port_template lpfc_nvme_template = { - .module = THIS_MODULE, - /* initiator-based functions */ .localport_delete = lpfc_nvme_localport_delete, .remoteport_delete = lpfc_nvme_remoteport_delete, diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d8e0ba68879c3ca162cf8ef4d992f60e75d78038..480d2d467f7a68d906b6f8e052e14b975128a186 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2271,6 +2271,8 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) !pmb->u.mb.mbxStatus) { rpi = pmb->u.mb.un.varWords[0]; vpi = pmb->u.mb.un.varRegLogin.vpi; + if (phba->sli_rev == LPFC_SLI_REV4) + vpi -= phba->sli4_hba.max_cfg_param.vpi_base; lpfc_unreg_login(phba, vpi, rpi, pmb); pmb->vport = vport; pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 5fb87cfac84dc0c9918eed7c2259faf102d70ab7..ac6870a56401febdfca47cdda4e79b705c2be142 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -8280,8 +8280,8 @@ static void scsih_remove(struct pci_dev *pdev) ioc->remove_host = 1; - mpt3sas_wait_for_commands_to_complete(ioc); - _scsih_flush_running_cmds(ioc); + if (!pci_device_is_present(pdev)) + _scsih_flush_running_cmds(ioc); _scsih_fw_event_cleanup_queue(ioc); @@ -8354,8 +8354,8 @@ scsih_shutdown(struct pci_dev *pdev) ioc->remove_host = 1; - mpt3sas_wait_for_commands_to_complete(ioc); - _scsih_flush_running_cmds(ioc); + if (!pci_device_is_present(pdev)) + _scsih_flush_running_cmds(ioc); _scsih_fw_event_cleanup_queue(ioc); diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 7dceed02123617a12bbf0de47e3517aed0087c55..6b33a1f24f56169a3a17803e5a6952da2e6445a6 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -578,7 +578,6 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) } static struct nvme_fc_port_template qla_nvme_fc_transport = { - .module = THIS_MODULE, .localport_delete = qla_nvme_localport_delete, .remoteport_delete = qla_nvme_remoteport_delete, .create_queue = qla_nvme_alloc_queue, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5f9d4dbc4a98e6806b86a2f8a27e57101498fff3..d4024015f859f1b6183d71c51fca502acc0386e7 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3178,6 +3178,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) base_vha->mgmt_svr_loop_id, host->sg_tablesize); ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 0); + if (unlikely(!ha->wq)) { + ret = -ENOMEM; + goto probe_failed; + } if (ha->mqenable) { bool mq = false; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 95d71e301a53420b72f92aee0e2580fed3deb339..9589015234693441f82dcb33e3ff2e7b984ccf5b 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2010,7 +2010,7 @@ static void __iscsi_unbind_session(struct work_struct *work) if (session->target_id == ISCSI_MAX_TARGET) { spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); - return; + goto unbind_session_exit; } target_id = session->target_id; @@ -2022,6 +2022,8 @@ static void __iscsi_unbind_session(struct work_struct *work) ida_simple_remove(&iscsi_sess_ida, target_id); scsi_remove_target(&session->dev); + +unbind_session_exit: iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION); ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n"); } @@ -2945,6 +2947,24 @@ iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev) return err; } +static int iscsi_session_has_conns(int sid) +{ + struct iscsi_cls_conn *conn; + unsigned long flags; + int found = 0; + + spin_lock_irqsave(&connlock, flags); + list_for_each_entry(conn, &connlist, conn_list) { + if (iscsi_conn_get_sid(conn) == sid) { + found = 1; + break; + } + } + spin_unlock_irqrestore(&connlock, flags); + + return found; +} + static int iscsi_set_iface_params(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) @@ -3522,10 +3542,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) break; case ISCSI_UEVENT_DESTROY_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); - if (session) - transport->destroy_session(session); - else + if (!session) err = -EINVAL; + else if (iscsi_session_has_conns(ev->u.d_session.sid)) + err = -EBUSY; + else + transport->destroy_session(session); break; case ISCSI_UEVENT_UNBIND_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 22025a5818dd010b312521284253169f475d42c2..489737392c37c93d8fc50d30cb3f8b8c0b3f408b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3120,12 +3120,14 @@ static int sd_revalidate_disk(struct gendisk *disk) dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks); q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max); - if (sd_validate_opt_xfer_size(sdkp, dev_max)) + if (sd_validate_opt_xfer_size(sdkp, dev_max)) { rw_max = q->limits.io_opt = sdkp->opt_xfer_blocks * sdp->sector_size; - else + } else { + q->limits.io_opt = 0; rw_max = min_not_zero(logical_to_sectors(sdp, dev_max), (sector_t)BLK_DEF_MAX_SECTORS); + } /* Do not exceed controller limit */ rw_max = min(rw_max, queue_max_hw_sectors(q)); diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 6dc7f6150c131c00778920e020a3852e0cfc77f2..8d6b13827c3906864c738f127acd75e370045984 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -809,8 +809,10 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, "sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) cmnd[0], (int) hp->cmd_len)); - if (hp->dxfer_len >= SZ_256M) + if (hp->dxfer_len >= SZ_256M) { + sg_remove_request(sfp, srp); return -EINVAL; + } k = sg_start_req(srp, cmnd); if (k) { diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c93252fc4a502f94100e008b0b01e390d22e104d..f1896b29b3acb549afa81325266d172b66a93374 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -950,8 +950,11 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long bitmap, bool pr_prdt) ufshcd_hex_dump(hba, "UPIU RSP", lrbp->ucd_rsp_ptr, sizeof(struct utp_upiu_rsp)); - prdt_length = le16_to_cpu( - lrbp->utr_descriptor_ptr->prd_table_length); + prdt_length = + le16_to_cpu(lrbp->utr_descriptor_ptr->prd_table_length); + if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) + prdt_length /= hba->sg_entry_size; + dev_err(hba->dev, "UPIU[%d] - PRDT - %d entries phys@0x%llx\n", tag, prdt_length, @@ -959,7 +962,7 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long bitmap, bool pr_prdt) if (pr_prdt) ufshcd_hex_dump(hba, "UPIU PRDT", lrbp->ucd_prdt_ptr, - sizeof(struct ufshcd_sg_entry) * prdt_length); + hba->sg_entry_size * prdt_length); } } @@ -2184,7 +2187,6 @@ int ufshcd_hold(struct ufs_hba *hba, bool async) hba->clk_gating.active_reqs--; break; } - spin_unlock_irqrestore(hba->host->host_lock, flags); flush_work(&hba->clk_gating.ungate_work); spin_lock_irqsave(hba->host->host_lock, flags); @@ -3277,7 +3279,7 @@ ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) */ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) { - struct ufshcd_sg_entry *prd_table; + struct ufshcd_sg_entry *prd; struct scatterlist *sg; struct scsi_cmnd *cmd; int sg_segments; @@ -3292,21 +3294,22 @@ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) lrbp->utr_descriptor_ptr->prd_table_length = cpu_to_le16((u16)(sg_segments * - sizeof(struct ufshcd_sg_entry))); + hba->sg_entry_size)); else lrbp->utr_descriptor_ptr->prd_table_length = cpu_to_le16((u16) (sg_segments)); - prd_table = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr; + prd = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr; scsi_for_each_sg(cmd, sg, sg_segments, i) { - prd_table[i].size = + prd->size = cpu_to_le32(((u32) sg_dma_len(sg))-1); - prd_table[i].base_addr = + prd->base_addr = cpu_to_le32(lower_32_bits(sg->dma_address)); - prd_table[i].upper_addr = + prd->upper_addr = cpu_to_le32(upper_32_bits(sg->dma_address)); - prd_table[i].reserved = 0; + prd->reserved = 0; + prd = (void *)prd + hba->sg_entry_size; } } else { lrbp->utr_descriptor_ptr->prd_table_length = 0; @@ -4760,7 +4763,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) size_t utmrdl_size, utrdl_size, ucdl_size; /* Allocate memory for UTP command descriptors */ - ucdl_size = (sizeof(struct utp_transfer_cmd_desc) * hba->nutrs); + ucdl_size = (sizeof_utp_transfer_cmd_desc(hba) * hba->nutrs); hba->ucdl_base_addr = dmam_alloc_coherent(hba->dev, ucdl_size, &hba->ucdl_dma_addr, @@ -4856,7 +4859,7 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba) prdt_offset = offsetof(struct utp_transfer_cmd_desc, prd_table); - cmd_desc_size = sizeof(struct utp_transfer_cmd_desc); + cmd_desc_size = sizeof_utp_transfer_cmd_desc(hba); cmd_desc_dma_addr = hba->ucdl_dma_addr; for (i = 0; i < hba->nutrs; i++) { @@ -4888,17 +4891,17 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba) hba->lrb[i].utr_descriptor_ptr = (utrdlp + i); hba->lrb[i].utrd_dma_addr = hba->utrdl_dma_addr + (i * sizeof(struct utp_transfer_req_desc)); - hba->lrb[i].ucd_req_ptr = - (struct utp_upiu_req *)(cmd_descp + i); + hba->lrb[i].ucd_req_ptr = (struct utp_upiu_req *)cmd_descp; hba->lrb[i].ucd_req_dma_addr = cmd_desc_element_addr; hba->lrb[i].ucd_rsp_ptr = - (struct utp_upiu_rsp *)cmd_descp[i].response_upiu; + (struct utp_upiu_rsp *)cmd_descp->response_upiu; hba->lrb[i].ucd_rsp_dma_addr = cmd_desc_element_addr + response_offset; hba->lrb[i].ucd_prdt_ptr = - (struct ufshcd_sg_entry *)cmd_descp[i].prd_table; + (struct ufshcd_sg_entry *)cmd_descp->prd_table; hba->lrb[i].ucd_prdt_dma_addr = cmd_desc_element_addr + prdt_offset; + cmd_descp = (void *)cmd_descp + cmd_desc_size; } } @@ -7995,7 +7998,6 @@ static int ufshcd_detect_device(struct ufs_hba *hba) static int ufshcd_reset_and_restore(struct ufs_hba *hba) { int err = 0; - unsigned long flags; int retries = MAX_HOST_RESET_RETRIES; ufshcd_enable_irq(hba); @@ -8011,15 +8013,6 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba) if (err && ufshcd_is_embedded_dev(hba)) BUG(); - /* - * After reset the door-bell might be cleared, complete - * outstanding requests in s/w here. - */ - spin_lock_irqsave(hba->host->host_lock, flags); - ufshcd_transfer_req_compl(hba); - ufshcd_tmc_handler(hba); - spin_unlock_irqrestore(hba->host->host_lock, flags); - return err; } @@ -11111,6 +11104,7 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) hba->host = host; hba->dev = dev; *hba_handle = hba; + hba->sg_entry_size = sizeof(struct ufshcd_sg_entry); INIT_LIST_HEAD(&hba->clk_list_head); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 52f1b21bb58d6454398de6476a76cbc044d5bc2a..e94b04baa3ef25eef1351dea417f67af1142e8d9 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -733,6 +733,7 @@ enum ufshcd_card_state { * @ufs_version: UFS Version to which controller complies * @var: pointer to variant specific data * @priv: pointer to variant specific private data + * @sg_entry_size: size of struct ufshcd_sg_entry (may include variant fields) * @irq: Irq number of the controller * @active_uic_cmd: handle of active UIC command * @uic_cmd_mutex: mutex for uic command @@ -752,6 +753,7 @@ enum ufshcd_card_state { * @uic_error: UFS interconnect layer error status * @saved_err: sticky error mask * @saved_uic_err: sticky UIC error mask + * @silence_err_logs: flag to silence error logs * @dev_cmd: ufs device management command information * @last_dme_cmd_tstamp: time stamp of the last completed DME command * @auto_bkops_enabled: to track whether bkops is enabled in device @@ -816,6 +818,7 @@ struct ufs_hba { u32 ufs_version; struct ufs_hba_variant *var; void *priv; + size_t sg_entry_size; unsigned int irq; bool is_irq_enabled; bool crash_on_err; diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index 173cffe4226ddfbda37df0111298b9608973cc74..91f8527649324f635549d0c1bd9f8a9e965a9002 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -411,21 +411,28 @@ struct ufshcd_sg_entry { __le32 upper_addr; __le32 reserved; __le32 size; + /* + * followed by variant-specific fields if + * hba->sg_entry_size != sizeof(struct ufshcd_sg_entry) + */ }; /** * struct utp_transfer_cmd_desc - UFS Command Descriptor structure * @command_upiu: Command UPIU Frame address * @response_upiu: Response UPIU Frame address - * @prd_table: Physical Region Descriptor + * @prd_table: Physical Region Descriptor: an array of SG_ALL struct + * ufshcd_sg_entry's. Variant-specific fields may be present after each. */ struct utp_transfer_cmd_desc { u8 command_upiu[ALIGNED_UPIU_SIZE]; u8 response_upiu[ALIGNED_UPIU_SIZE]; - struct ufshcd_sg_entry prd_table[SG_ALL]; + u8 prd_table[]; }; #define UTRD_CRYPTO_ENABLE UFS_BIT(23) +#define sizeof_utp_transfer_cmd_desc(hba) \ + (sizeof(struct utp_transfer_cmd_desc) + SG_ALL * (hba)->sg_entry_size) /** * struct request_desc_header - Descriptor Header common to both UTRD and UTMRD diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index 3a12123de46629d4c0271b8e325449b2eaa94ce6..0e083fe8b893f6ea1769383662caa7fe3e03ba9b 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -97,8 +97,8 @@ static int imx6_pm_domain_power_off(struct generic_pm_domain *genpd) static int imx6_pm_domain_power_on(struct generic_pm_domain *genpd) { struct imx_pm_domain *pd = to_imx_pm_domain(genpd); - int i, ret, sw, sw2iso; - u32 val; + int i, ret; + u32 val, req; if (pd->supply) { ret = regulator_enable(pd->supply); @@ -117,17 +117,18 @@ static int imx6_pm_domain_power_on(struct generic_pm_domain *genpd) regmap_update_bits(pd->regmap, pd->reg_offs + GPC_PGC_CTRL_OFFS, 0x1, 0x1); - /* Read ISO and ISO2SW power up delays */ - regmap_read(pd->regmap, pd->reg_offs + GPC_PGC_PUPSCR_OFFS, &val); - sw = val & 0x3f; - sw2iso = (val >> 8) & 0x3f; - /* Request GPC to power up domain */ - val = BIT(pd->cntr_pdn_bit + 1); - regmap_update_bits(pd->regmap, GPC_CNTR, val, val); + req = BIT(pd->cntr_pdn_bit + 1); + regmap_update_bits(pd->regmap, GPC_CNTR, req, req); - /* Wait ISO + ISO2SW IPG clock cycles */ - udelay(DIV_ROUND_UP(sw + sw2iso, pd->ipg_rate_mhz)); + /* Wait for the PGC to handle the request */ + ret = regmap_read_poll_timeout(pd->regmap, GPC_CNTR, val, !(val & req), + 1, 50); + if (ret) + pr_err("powerup request on domain %s timed out\n", genpd->name); + + /* Wait for reset to propagate through peripherals */ + usleep_range(5, 10); /* Disable reset clocks for all devices in the domain */ for (i = 0; i < pd->num_clks; i++) @@ -329,6 +330,7 @@ static const struct regmap_config imx_gpc_regmap_config = { .rd_table = &access_table, .wr_table = &access_table, .max_register = 0x2ac, + .fast_io = true, }; static struct generic_pm_domain *imx_gpc_onecell_domains[] = { diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c index 5b18f6ffa45c798a9c8d138a4f0fb647551b4932..cd61c883c19f52a8b2e9d4d68a634d3f5fb14510 100644 --- a/drivers/soc/tegra/fuse/tegra-apbmisc.c +++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c @@ -134,7 +134,7 @@ void __init tegra_init_apbmisc(void) apbmisc.flags = IORESOURCE_MEM; /* strapping options */ - if (tegra_get_chip_id() == TEGRA124) { + if (of_machine_is_compatible("nvidia,tegra124")) { straps.start = 0x7000e864; straps.end = 0x7000e867; } else { diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index b2245cdce230b32c3be06922255bda2f2f0c1b63..5160e16d3a9852ffa7dd462ed66fb8c537bdcd63 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -76,6 +76,10 @@ MODULE_ALIAS("platform:pxa2xx-spi"); #define LPSS_CAPS_CS_EN_SHIFT 9 #define LPSS_CAPS_CS_EN_MASK (0xf << LPSS_CAPS_CS_EN_SHIFT) +#define LPSS_PRIV_CLOCK_GATE 0x38 +#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK 0x3 +#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON 0x3 + struct lpss_config { /* LPSS offset from drv_data->ioaddr */ unsigned offset; @@ -92,6 +96,8 @@ struct lpss_config { unsigned cs_sel_shift; unsigned cs_sel_mask; unsigned cs_num; + /* Quirks */ + unsigned cs_clk_stays_gated : 1; }; /* Keep these sorted with enum pxa_ssp_type */ @@ -162,6 +168,7 @@ static const struct lpss_config lpss_platforms[] = { .tx_threshold_hi = 56, .cs_sel_shift = 8, .cs_sel_mask = 3 << 8, + .cs_clk_stays_gated = true, }, }; @@ -385,6 +392,22 @@ static void lpss_ssp_cs_control(struct driver_data *drv_data, bool enable) else value |= LPSS_CS_CONTROL_CS_HIGH; __lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value); + if (config->cs_clk_stays_gated) { + u32 clkgate; + + /* + * Changing CS alone when dynamic clock gating is on won't + * actually flip CS at that time. This ruins SPI transfers + * that specify delays, or have no data. Toggle the clock mode + * to force on briefly to poke the CS pin to move. + */ + clkgate = __lpss_ssp_read_priv(drv_data, LPSS_PRIV_CLOCK_GATE); + value = (clkgate & ~LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK) | + LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON; + + __lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, value); + __lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, clkgate); + } } static void cs_assert(struct driver_data *drv_data) diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index 974a8ce58b68bc31e38d717ff16564aec398232e..cb74fd1af2053015796fcd0c7e6055701e063e78 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -1190,6 +1190,11 @@ static int spi_qup_suspend(struct device *device) struct spi_qup *controller = spi_master_get_devdata(master); int ret; + if (pm_runtime_suspended(device)) { + ret = spi_qup_pm_resume_runtime(device); + if (ret) + return ret; + } ret = spi_master_suspend(master); if (ret) return ret; @@ -1198,10 +1203,8 @@ static int spi_qup_suspend(struct device *device) if (ret) return ret; - if (!pm_runtime_suspended(device)) { - clk_disable_unprepare(controller->cclk); - clk_disable_unprepare(controller->iclk); - } + clk_disable_unprepare(controller->cclk); + clk_disable_unprepare(controller->iclk); return 0; } diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index 18aeaceee2862017f4cd4bb13a5e3c6bd36f6ea7..d26c0eda2d9ea5fd3bb72d280b488a5cced4f557 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -415,9 +415,6 @@ static void zynqmp_qspi_chipselect(struct spi_device *qspi, bool is_high) zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, genfifoentry); - /* Dummy generic FIFO entry */ - zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, 0x0); - /* Manually start the generic FIFO command */ zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) | diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index e19e395b0e440622dda2f5acfee98573922d25f1..9f1ec427c168c94bf2a18f1a13fdba86988db4b2 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2603,8 +2603,10 @@ static int comedi_open(struct inode *inode, struct file *file) } cfp = kzalloc(sizeof(*cfp), GFP_KERNEL); - if (!cfp) + if (!cfp) { + comedi_dev_put(dev); return -ENOMEM; + } cfp->dev = dev; diff --git a/drivers/staging/comedi/drivers/dt2815.c b/drivers/staging/comedi/drivers/dt2815.c index ce557197119454f41bf2005f4731edda3d59cdb4..9b773c2e140b29e56c1db89cf959566e695e4b8a 100644 --- a/drivers/staging/comedi/drivers/dt2815.c +++ b/drivers/staging/comedi/drivers/dt2815.c @@ -101,6 +101,7 @@ static int dt2815_ao_insn(struct comedi_device *dev, struct comedi_subdevice *s, int ret; for (i = 0; i < insn->n; i++) { + /* FIXME: lo bit 0 chooses voltage output or current output */ lo = ((data[i] & 0x0f) << 4) | (chan << 1) | 0x01; hi = (data[i] & 0xff0) >> 4; @@ -114,6 +115,8 @@ static int dt2815_ao_insn(struct comedi_device *dev, struct comedi_subdevice *s, if (ret) return ret; + outb(hi, dev->iobase + DT2815_DATA); + devpriv->ao_readback[chan] = data[i]; } return i; diff --git a/drivers/staging/greybus/audio_manager.c b/drivers/staging/greybus/audio_manager.c index aa6508b44fab27f805bcd629c8815a9d0b8bf413..ed7c32542cb3cbd0e38c022e336cfee3b03bac4b 100644 --- a/drivers/staging/greybus/audio_manager.c +++ b/drivers/staging/greybus/audio_manager.c @@ -90,8 +90,8 @@ void gb_audio_manager_remove_all(void) list_for_each_entry_safe(module, next, &modules_list, list) { list_del(&module->list); - kobject_put(&module->kobj); ida_simple_remove(&module_id, module->id); + kobject_put(&module->kobj); } is_empty = list_empty(&modules_list); diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c index fbe589fca840845ec8ec21fa81c065e9eab4926e..b6aa70b94f335de9830edae7165fae696f1f684e 100644 --- a/drivers/staging/greybus/tools/loopback_test.c +++ b/drivers/staging/greybus/tools/loopback_test.c @@ -20,6 +20,7 @@ #include #define MAX_NUM_DEVICES 10 +#define MAX_SYSFS_PREFIX 0x80 #define MAX_SYSFS_PATH 0x200 #define CSV_MAX_LINE 0x1000 #define SYSFS_MAX_INT 0x20 @@ -68,7 +69,7 @@ struct loopback_results { }; struct loopback_device { - char name[MAX_SYSFS_PATH]; + char name[MAX_STR_LEN]; char sysfs_entry[MAX_SYSFS_PATH]; char debugfs_entry[MAX_SYSFS_PATH]; struct loopback_results results; @@ -94,8 +95,8 @@ struct loopback_test { int stop_all; int poll_count; char test_name[MAX_STR_LEN]; - char sysfs_prefix[MAX_SYSFS_PATH]; - char debugfs_prefix[MAX_SYSFS_PATH]; + char sysfs_prefix[MAX_SYSFS_PREFIX]; + char debugfs_prefix[MAX_SYSFS_PREFIX]; struct timespec poll_timeout; struct loopback_device devices[MAX_NUM_DEVICES]; struct loopback_results aggregate_results; @@ -644,7 +645,7 @@ int find_loopback_devices(struct loopback_test *t) static int open_poll_files(struct loopback_test *t) { struct loopback_device *dev; - char buf[MAX_STR_LEN]; + char buf[MAX_SYSFS_PATH + MAX_STR_LEN]; char dummy; int fds_idx = 0; int i; @@ -914,10 +915,10 @@ int main(int argc, char *argv[]) t.iteration_max = atoi(optarg); break; case 'S': - snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", optarg); + snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg); break; case 'D': - snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", optarg); + snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg); break; case 'm': t.mask = atol(optarg); @@ -968,10 +969,10 @@ int main(int argc, char *argv[]) } if (!strcmp(t.sysfs_prefix, "")) - snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", sysfs_prefix); + snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", sysfs_prefix); if (!strcmp(t.debugfs_prefix, "")) - snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", debugfs_prefix); + snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", debugfs_prefix); ret = find_loopback_devices(&t); if (ret) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 446310775e9021bcc7e9f72cb94545e0b9c1b012..184fc05a0f8b7bfab0d8405a25f1242f3b01d8e5 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -2051,7 +2051,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) struct ieee_param *param; uint ret = 0; - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } @@ -2856,7 +2856,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) goto out; } - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 5364533585685e46af5513c3499aa5e1aa19488b..30615b8fb657a2dd0230b4419dc680edf74b9c75 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -40,12 +40,14 @@ static const struct usb_device_id rtw_usb_id_tbl[] = { /****** 8188EUS ********/ {USB_DEVICE(0x056e, 0x4008)}, /* Elecom WDC-150SU2M */ {USB_DEVICE(0x07b8, 0x8179)}, /* Abocom - Abocom */ + {USB_DEVICE(0x0B05, 0x18F0)}, /* ASUS USB-N10 Nano B1 */ {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */ {USB_DEVICE(0x2001, 0x3310)}, /* Dlink DWA-123 REV D1 */ {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ {USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x2357, 0x0111)}, /* TP-Link TL-WN727N v5.21 */ + {USB_DEVICE(0x2C4E, 0x0102)}, /* MERCUSYS MW150US v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ {} /* Terminating entry */ diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c index d0b317077511ce23d1939617dd575204bce90d84..f92f9073c50769e7e83dbc0b2746a27c23b34b9b 100644 --- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c +++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c @@ -486,14 +486,13 @@ int rtl8723bs_xmit_thread(void *context) s32 ret; struct adapter *padapter; struct xmit_priv *pxmitpriv; - u8 thread_name[20] = "RTWHALXT"; - + u8 thread_name[20]; ret = _SUCCESS; padapter = context; pxmitpriv = &padapter->xmitpriv; - rtw_sprintf(thread_name, 20, "%s-"ADPT_FMT, thread_name, ADPT_ARG(padapter)); + rtw_sprintf(thread_name, 20, "RTWHALXT-" ADPT_FMT, ADPT_ARG(padapter)); thread_enter(thread_name); DBG_871X("start "FUNC_ADPT_FMT"\n", FUNC_ADPT_ARG(padapter)); diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index 1b61da61690b680f2514cff7d7bd0d363745cf55..d51f6c452972365e904384a8626ab74d7bdf4ee0 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -3495,7 +3495,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) /* down(&ieee->wx_sem); */ - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } @@ -4340,7 +4340,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) /* if (p->length < sizeof(struct ieee_param) || !p->pointer) { */ - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(*param)) { ret = -EINVAL; goto out; } diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c index 56f7be6af1f695a873c9acfe6619d3ec8dafd496..a27f5e9a1ae183f6621035a59662074b8faf7bfc 100644 --- a/drivers/staging/speakup/main.c +++ b/drivers/staging/speakup/main.c @@ -567,7 +567,7 @@ static u_long get_word(struct vc_data *vc) return 0; } else if (tmpx < vc->vc_cols - 2 && (ch == SPACE || ch == 0 || (ch < 0x100 && IS_WDLM(ch))) && - get_char(vc, (u_short *)&tmp_pos + 1, &temp) > SPACE) { + get_char(vc, (u_short *)tmp_pos + 1, &temp) > SPACE) { tmp_pos += 2; tmpx++; } else diff --git a/drivers/staging/vt6656/dpc.c b/drivers/staging/vt6656/dpc.c index 655f0002f8808a6d4527dd23f35987d8c014bda2..7b73fa2f883404b4b05e2db08af246e4bf31918b 100644 --- a/drivers/staging/vt6656/dpc.c +++ b/drivers/staging/vt6656/dpc.c @@ -140,7 +140,7 @@ int vnt_rx_data(struct vnt_private *priv, struct vnt_rcb *ptr_rcb, vnt_rf_rssi_to_dbm(priv, *rssi, &rx_dbm); - priv->bb_pre_ed_rssi = (u8)rx_dbm + 1; + priv->bb_pre_ed_rssi = (u8)-rx_dbm + 1; priv->current_rssi = priv->bb_pre_ed_rssi; frame = skb_data + 8; diff --git a/drivers/staging/vt6656/int.c b/drivers/staging/vt6656/int.c index c521729c4192cb500198a3252001d363211f026b..d5d89e83630936d519065921c015e6fdc780bea2 100644 --- a/drivers/staging/vt6656/int.c +++ b/drivers/staging/vt6656/int.c @@ -153,7 +153,8 @@ void vnt_int_process_data(struct vnt_private *priv) priv->wake_up_count = priv->hw->conf.listen_interval; - --priv->wake_up_count; + if (priv->wake_up_count) + --priv->wake_up_count; /* Turn on wake up to listen next beacon */ if (priv->wake_up_count == 1) diff --git a/drivers/staging/vt6656/key.c b/drivers/staging/vt6656/key.c index cc18cb141bff18e8f80437361506d857eb0b8acc..5ecc1a97cb44a0ae21be567c98a7d5d1845a57ec 100644 --- a/drivers/staging/vt6656/key.c +++ b/drivers/staging/vt6656/key.c @@ -91,9 +91,6 @@ static int vnt_set_keymode(struct ieee80211_hw *hw, u8 *mac_addr, case VNT_KEY_PAIRWISE: key_mode |= mode; key_inx = 4; - /* Don't save entry for pairwise key for station mode */ - if (priv->op_mode == NL80211_IFTYPE_STATION) - clear_bit(entry, &priv->key_entry_inuse); break; default: return -EINVAL; @@ -117,7 +114,6 @@ static int vnt_set_keymode(struct ieee80211_hw *hw, u8 *mac_addr, int vnt_set_keys(struct ieee80211_hw *hw, struct ieee80211_sta *sta, struct ieee80211_vif *vif, struct ieee80211_key_conf *key) { - struct ieee80211_bss_conf *conf = &vif->bss_conf; struct vnt_private *priv = hw->priv; u8 *mac_addr = NULL; u8 key_dec_mode = 0; @@ -159,16 +155,12 @@ int vnt_set_keys(struct ieee80211_hw *hw, struct ieee80211_sta *sta, key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; } - if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) { + if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) vnt_set_keymode(hw, mac_addr, key, VNT_KEY_PAIRWISE, key_dec_mode, true); - } else { - vnt_set_keymode(hw, mac_addr, key, VNT_KEY_DEFAULTKEY, + else + vnt_set_keymode(hw, mac_addr, key, VNT_KEY_GROUP_ADDRESS, key_dec_mode, true); - vnt_set_keymode(hw, (u8 *)conf->bssid, key, - VNT_KEY_GROUP_ADDRESS, key_dec_mode, true); - } - return 0; } diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index e8ccd800c94fde6a504eec428291f7ab6f61cf4c..9adab851580cab21792c99fbf3a47c1e02ea8b8d 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -594,8 +594,6 @@ static int vnt_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) priv->op_mode = vif->type; - vnt_set_bss_mode(priv); - /* LED blink on TX */ vnt_mac_set_led(priv, LEDSTS_STS, LEDSTS_INTER); @@ -682,7 +680,6 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, priv->basic_rates = conf->basic_rates; vnt_update_top_rates(priv); - vnt_set_bss_mode(priv); dev_dbg(&priv->usb->dev, "basic rates %x\n", conf->basic_rates); } @@ -711,11 +708,14 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, priv->short_slot_time = false; vnt_set_short_slot_time(priv); - vnt_update_ifs(priv); vnt_set_vga_gain_offset(priv, priv->bb_vga[0]); vnt_update_pre_ed_threshold(priv, false); } + if (changed & (BSS_CHANGED_BASIC_RATES | BSS_CHANGED_ERP_PREAMBLE | + BSS_CHANGED_ERP_SLOT)) + vnt_set_bss_mode(priv); + if (changed & BSS_CHANGED_TXPOWER) vnt_rf_setpower(priv, priv->current_rate, conf->chandef.chan->hw_value); @@ -739,12 +739,15 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, vnt_mac_reg_bits_on(priv, MAC_REG_TFTCTL, TFTCTL_TSFCNTREN); - vnt_adjust_tsf(priv, conf->beacon_rate->hw_value, - conf->sync_tsf, priv->current_tsf); - vnt_mac_set_beacon_interval(priv, conf->beacon_int); vnt_reset_next_tbtt(priv, conf->beacon_int); + + vnt_adjust_tsf(priv, conf->beacon_rate->hw_value, + conf->sync_tsf, priv->current_tsf); + + vnt_update_next_tbtt(priv, + conf->sync_tsf, conf->beacon_int); } else { vnt_clear_current_tsf(priv); @@ -779,15 +782,11 @@ static void vnt_configure(struct ieee80211_hw *hw, { struct vnt_private *priv = hw->priv; u8 rx_mode = 0; - int rc; *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; - rc = vnt_control_in(priv, MESSAGE_TYPE_READ, MAC_REG_RCR, - MESSAGE_REQUEST_MACREG, sizeof(u8), &rx_mode); - - if (!rc) - rx_mode = RCR_MULTICAST | RCR_BROADCAST; + vnt_control_in(priv, MESSAGE_TYPE_READ, MAC_REG_RCR, + MESSAGE_REQUEST_MACREG, sizeof(u8), &rx_mode); dev_dbg(&priv->usb->dev, "rx mode in = %x\n", rx_mode); @@ -828,8 +827,12 @@ static int vnt_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, return -EOPNOTSUPP; break; case DISABLE_KEY: - if (test_bit(key->hw_key_idx, &priv->key_entry_inuse)) + if (test_bit(key->hw_key_idx, &priv->key_entry_inuse)) { clear_bit(key->hw_key_idx, &priv->key_entry_inuse); + + vnt_mac_disable_keyentry(priv, key->hw_key_idx); + } + default: break; } diff --git a/drivers/staging/wlan-ng/hfa384x_usb.c b/drivers/staging/wlan-ng/hfa384x_usb.c index 2a22e448a2b3b49779446e8417ebaadcbf259874..fb1a76c4c9271fda2453a798fe57502a92aed210 100644 --- a/drivers/staging/wlan-ng/hfa384x_usb.c +++ b/drivers/staging/wlan-ng/hfa384x_usb.c @@ -3495,6 +3495,8 @@ static void hfa384x_int_rxmonitor(struct wlandevice *wlandev, WLAN_HDR_A4_LEN + WLAN_DATA_MAXLEN + WLAN_CRC_LEN)) { pr_debug("overlen frm: len=%zd\n", skblen - sizeof(struct p80211_caphdr)); + + return; } skb = dev_alloc_skb(skblen); diff --git a/drivers/staging/wlan-ng/prism2usb.c b/drivers/staging/wlan-ng/prism2usb.c index b5ba176004c1f88aa1e0cfc5f127bc3fa06586b8..d8d86761b790ae07744525e279c09084a2e85b3a 100644 --- a/drivers/staging/wlan-ng/prism2usb.c +++ b/drivers/staging/wlan-ng/prism2usb.c @@ -180,6 +180,7 @@ static void prism2sta_disconnect_usb(struct usb_interface *interface) cancel_work_sync(&hw->link_bh); cancel_work_sync(&hw->commsqual_bh); + cancel_work_sync(&hw->usb_work); /* Now we complete any outstanding commands * and tell everyone who is waiting for their diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 37d64acea5e1ea166c6f78c8ce24fd33cf0495d5..ee49b227dc12b6ca04b4831427c61c2d19bdb2c5 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1158,9 +1158,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length, conn->cid); - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) - return iscsit_add_reject_cmd(cmd, - ISCSI_REASON_WAITING_FOR_LOGOUT, buf); + target_get_sess_cmd(&cmd->se_cmd, true); cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd, scsilun_to_int(&hdr->lun)); @@ -2006,9 +2004,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) - return iscsit_add_reject_cmd(cmd, - ISCSI_REASON_WAITING_FOR_LOGOUT, buf); + target_get_sess_cmd(&cmd->se_cmd, true); /* * TASK_REASSIGN for ERL=2 / connection stays inside of @@ -4155,6 +4151,9 @@ int iscsit_close_connection( iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + /* * During Connection recovery drop unacknowledged out of order * commands for this connection, and prepare the other commands @@ -4237,11 +4236,6 @@ int iscsit_close_connection( * must wait until they have completed. */ iscsit_check_conn_usage_count(conn); - target_sess_cmd_list_set_waiting(sess->se_sess); - target_wait_for_sess_cmds(sess->se_sess); - - if (conn->conn_transport->iscsit_wait_conn) - conn->conn_transport->iscsit_wait_conn(conn); ahash_request_free(conn->conn_tx_hash); if (conn->conn_rx_hash) { @@ -4320,30 +4314,37 @@ int iscsit_close_connection( if (!atomic_read(&sess->session_reinstatement) && atomic_read(&sess->session_fall_back_to_erl0)) { spin_unlock_bh(&sess->conn_lock); + complete_all(&sess->session_wait_comp); iscsit_close_session(sess); return 0; } else if (atomic_read(&sess->session_logout)) { pr_debug("Moving to TARG_SESS_STATE_FREE.\n"); sess->session_state = TARG_SESS_STATE_FREE; - spin_unlock_bh(&sess->conn_lock); - if (atomic_read(&sess->sleep_on_sess_wait_comp)) - complete(&sess->session_wait_comp); + if (atomic_read(&sess->session_close)) { + spin_unlock_bh(&sess->conn_lock); + complete_all(&sess->session_wait_comp); + iscsit_close_session(sess); + } else { + spin_unlock_bh(&sess->conn_lock); + } return 0; } else { pr_debug("Moving to TARG_SESS_STATE_FAILED.\n"); sess->session_state = TARG_SESS_STATE_FAILED; - if (!atomic_read(&sess->session_continuation)) { - spin_unlock_bh(&sess->conn_lock); + if (!atomic_read(&sess->session_continuation)) iscsit_start_time2retain_handler(sess); - } else - spin_unlock_bh(&sess->conn_lock); - if (atomic_read(&sess->sleep_on_sess_wait_comp)) - complete(&sess->session_wait_comp); + if (atomic_read(&sess->session_close)) { + spin_unlock_bh(&sess->conn_lock); + complete_all(&sess->session_wait_comp); + iscsit_close_session(sess); + } else { + spin_unlock_bh(&sess->conn_lock); + } return 0; } @@ -4452,9 +4453,9 @@ static void iscsit_logout_post_handler_closesession( complete(&conn->conn_logout_comp); iscsit_dec_conn_usage_count(conn); + atomic_set(&sess->session_close, 1); iscsit_stop_session(sess, sleep, sleep); iscsit_dec_session_usage_count(sess); - iscsit_close_session(sess); } static void iscsit_logout_post_handler_samecid( @@ -4589,49 +4590,6 @@ void iscsit_fail_session(struct iscsi_session *sess) sess->session_state = TARG_SESS_STATE_FAILED; } -int iscsit_free_session(struct iscsi_session *sess) -{ - u16 conn_count = atomic_read(&sess->nconn); - struct iscsi_conn *conn, *conn_tmp = NULL; - int is_last; - - spin_lock_bh(&sess->conn_lock); - atomic_set(&sess->sleep_on_sess_wait_comp, 1); - - list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list, - conn_list) { - if (conn_count == 0) - break; - - if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) { - is_last = 1; - } else { - iscsit_inc_conn_usage_count(conn_tmp); - is_last = 0; - } - iscsit_inc_conn_usage_count(conn); - - spin_unlock_bh(&sess->conn_lock); - iscsit_cause_connection_reinstatement(conn, 1); - spin_lock_bh(&sess->conn_lock); - - iscsit_dec_conn_usage_count(conn); - if (is_last == 0) - iscsit_dec_conn_usage_count(conn_tmp); - - conn_count--; - } - - if (atomic_read(&sess->nconn)) { - spin_unlock_bh(&sess->conn_lock); - wait_for_completion(&sess->session_wait_comp); - } else - spin_unlock_bh(&sess->conn_lock); - - iscsit_close_session(sess); - return 0; -} - void iscsit_stop_session( struct iscsi_session *sess, int session_sleep, @@ -4642,8 +4600,6 @@ void iscsit_stop_session( int is_last; spin_lock_bh(&sess->conn_lock); - if (session_sleep) - atomic_set(&sess->sleep_on_sess_wait_comp, 1); if (connection_sleep) { list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list, @@ -4701,12 +4657,15 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) spin_lock(&sess->conn_lock); if (atomic_read(&sess->session_fall_back_to_erl0) || atomic_read(&sess->session_logout) || + atomic_read(&sess->session_close) || (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { spin_unlock(&sess->conn_lock); continue; } + iscsit_inc_session_usage_count(sess); atomic_set(&sess->session_reinstatement, 1); atomic_set(&sess->session_fall_back_to_erl0, 1); + atomic_set(&sess->session_close, 1); spin_unlock(&sess->conn_lock); list_move_tail(&se_sess->sess_list, &free_list); @@ -4716,7 +4675,9 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) list_for_each_entry_safe(se_sess, se_sess_tmp, &free_list, sess_list) { sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; - iscsit_free_session(sess); + list_del_init(&se_sess->sess_list); + iscsit_stop_session(sess, 1, 1); + iscsit_dec_session_usage_count(sess); session_count++; } diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index 42de1843aa40d06f9967693ab5dbf20852e84629..f0d2cbf594c92ea2adee3e98febe8b4b52310149 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -43,7 +43,6 @@ extern int iscsi_target_rx_thread(void *); extern int iscsit_close_connection(struct iscsi_conn *); extern int iscsit_close_session(struct iscsi_session *); extern void iscsit_fail_session(struct iscsi_session *); -extern int iscsit_free_session(struct iscsi_session *); extern void iscsit_stop_session(struct iscsi_session *, int, int); extern int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *, int); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 0ebc4818e132ade606a77e8e46b46e183e111ddd..4191e4a8a9ed64e3ac37aff1a9a659a6125c2666 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1503,20 +1503,23 @@ static void lio_tpg_close_session(struct se_session *se_sess) spin_lock(&sess->conn_lock); if (atomic_read(&sess->session_fall_back_to_erl0) || atomic_read(&sess->session_logout) || + atomic_read(&sess->session_close) || (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { spin_unlock(&sess->conn_lock); spin_unlock_bh(&se_tpg->session_lock); return; } + iscsit_inc_session_usage_count(sess); atomic_set(&sess->session_reinstatement, 1); atomic_set(&sess->session_fall_back_to_erl0, 1); + atomic_set(&sess->session_close, 1); spin_unlock(&sess->conn_lock); iscsit_stop_time2retain_timer(sess); spin_unlock_bh(&se_tpg->session_lock); iscsit_stop_session(sess, 1, 1); - iscsit_close_session(sess); + iscsit_dec_session_usage_count(sess); } static u32 lio_tpg_get_inst_index(struct se_portal_group *se_tpg) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 27893d90c4efa992aadc7122b6c3ff00a478ed1f..55df6f99e66917ed74f9f05bc22be6ac81904d76 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -199,6 +199,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) spin_lock(&sess_p->conn_lock); if (atomic_read(&sess_p->session_fall_back_to_erl0) || atomic_read(&sess_p->session_logout) || + atomic_read(&sess_p->session_close) || (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { spin_unlock(&sess_p->conn_lock); continue; @@ -209,6 +210,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) (sess_p->sess_ops->SessionType == sessiontype))) { atomic_set(&sess_p->session_reinstatement, 1); atomic_set(&sess_p->session_fall_back_to_erl0, 1); + atomic_set(&sess_p->session_close, 1); spin_unlock(&sess_p->conn_lock); iscsit_inc_session_usage_count(sess_p); iscsit_stop_time2retain_timer(sess_p); @@ -233,7 +235,6 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) if (sess->session_state == TARG_SESS_STATE_FAILED) { spin_unlock_bh(&sess->conn_lock); iscsit_dec_session_usage_count(sess); - iscsit_close_session(sess); return 0; } spin_unlock_bh(&sess->conn_lock); @@ -241,7 +242,6 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) iscsit_stop_session(sess, 1, 1); iscsit_dec_session_usage_count(sess); - iscsit_close_session(sess); return 0; } @@ -534,6 +534,7 @@ static int iscsi_login_non_zero_tsih_s2( sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr; if (atomic_read(&sess_p->session_fall_back_to_erl0) || atomic_read(&sess_p->session_logout) || + atomic_read(&sess_p->session_close) || (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED)) continue; if (!memcmp(sess_p->isid, pdu->isid, 6) && diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 95aa47ac4dcd2b923a996e709611d0fd52469718..f8621fe6737671341067d112650b2d75f5b0b850 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -76,7 +76,7 @@ static int fc_get_pr_transport_id( * encoded TransportID. */ ptr = &se_nacl->initiatorname[0]; - for (i = 0; i < 24; ) { + for (i = 0; i < 23; ) { if (!strncmp(&ptr[i], ":", 1)) { i++; continue; diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 60429011292a2c4c2fa4104a90414ae948f956a7..2a9e023f542919df1b84d0c1a53ecacf8d1c4300 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -447,7 +447,7 @@ iblock_execute_zero_out(struct block_device *bdev, struct se_cmd *cmd) target_to_linux_sector(dev, cmd->t_task_lba), target_to_linux_sector(dev, sbc_get_write_same_sectors(cmd)), - GFP_KERNEL, false); + GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); if (ret) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index fe2384b019ec95f531718502579bb0197158591e..9cfc65ca173db5d04fe015e68c2e64fde0810186 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -240,6 +240,12 @@ static int tb_switch_nvm_read(void *priv, unsigned int offset, void *val, return dma_port_flash_read(sw->dma_port, offset, val, bytes); } +static int tb_switch_nvm_no_read(void *priv, unsigned int offset, void *val, + size_t bytes) +{ + return -EPERM; +} + static int tb_switch_nvm_write(void *priv, unsigned int offset, void *val, size_t bytes) { @@ -285,6 +291,7 @@ static struct nvmem_device *register_nvmem(struct tb_switch *sw, int id, config.read_only = true; } else { config.name = "nvm_non_active"; + config.reg_read = tb_switch_nvm_no_read; config.reg_write = tb_switch_nvm_write; config.root_only = true; } diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c index a1c7125cb968fdcbe9f69a5c29b306adff000b66..5a348efb91adae6d065f7735e76967d3b9ba8cc1 100644 --- a/drivers/tty/ehv_bytechan.c +++ b/drivers/tty/ehv_bytechan.c @@ -139,6 +139,21 @@ static int find_console_handle(void) return 1; } +static unsigned int local_ev_byte_channel_send(unsigned int handle, + unsigned int *count, + const char *p) +{ + char buffer[EV_BYTE_CHANNEL_MAX_BYTES]; + unsigned int c = *count; + + if (c < sizeof(buffer)) { + memcpy(buffer, p, c); + memset(&buffer[c], 0, sizeof(buffer) - c); + p = buffer; + } + return ev_byte_channel_send(handle, count, p); +} + /*************************** EARLY CONSOLE DRIVER ***************************/ #ifdef CONFIG_PPC_EARLY_DEBUG_EHV_BC @@ -157,7 +172,7 @@ static void byte_channel_spin_send(const char data) do { count = 1; - ret = ev_byte_channel_send(CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE, + ret = local_ev_byte_channel_send(CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE, &count, &data); } while (ret == EV_EAGAIN); } @@ -224,7 +239,7 @@ static int ehv_bc_console_byte_channel_send(unsigned int handle, const char *s, while (count) { len = min_t(unsigned int, count, EV_BYTE_CHANNEL_MAX_BYTES); do { - ret = ev_byte_channel_send(handle, &len, s); + ret = local_ev_byte_channel_send(handle, &len, s); } while (ret == EV_EAGAIN); count -= len; s += len; @@ -404,7 +419,7 @@ static void ehv_bc_tx_dequeue(struct ehv_bc_data *bc) CIRC_CNT_TO_END(bc->head, bc->tail, BUF_SIZE), EV_BYTE_CHANNEL_MAX_BYTES); - ret = ev_byte_channel_send(bc->handle, &len, bc->buf + bc->tail); + ret = local_ev_byte_channel_send(bc->handle, &len, bc->buf + bc->tail); /* 'len' is valid only if the return code is 0 or EV_EAGAIN */ if (!ret || (ret == EV_EAGAIN)) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index a8d399188242efe428bb579f845b716396465d9f..fc0ef13f2616a241c285f2fbb0a142e309f8d39e 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -288,10 +288,6 @@ int hvc_instantiate(uint32_t vtermno, int index, const struct hv_ops *ops) vtermnos[index] = vtermno; cons_ops[index] = ops; - /* reserve all indices up to and including this index */ - if (last_hvc < index) - last_hvc = index; - /* check if we need to re-register the kernel console */ hvc_check_console(index); @@ -895,13 +891,22 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data, cons_ops[i] == hp->ops) break; - /* no matching slot, just use a counter */ - if (i >= MAX_NR_HVC_CONSOLES) - i = ++last_hvc; + if (i >= MAX_NR_HVC_CONSOLES) { + + /* find 'empty' slot for console */ + for (i = 0; i < MAX_NR_HVC_CONSOLES && vtermnos[i] != -1; i++) { + } + + /* no matching slot, just use a counter */ + if (i == MAX_NR_HVC_CONSOLES) + i = ++last_hvc + MAX_NR_HVC_CONSOLES; + } hp->index = i; - cons_ops[i] = ops; - vtermnos[i] = vtermno; + if (i < MAX_NR_HVC_CONSOLES) { + cons_ops[i] = ops; + vtermnos[i] = vtermno; + } list_add_tail(&(hp->next), &hvc_structs); spin_unlock(&hvc_structs_lock); diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c index 32943afacffd1042c1519695fc8367333342abba..1081810b3e3fed77411958ee559eb2888791031d 100644 --- a/drivers/tty/rocket.c +++ b/drivers/tty/rocket.c @@ -645,18 +645,21 @@ init_r_port(int board, int aiop, int chan, struct pci_dev *pci_dev) tty_port_init(&info->port); info->port.ops = &rocket_port_ops; info->flags &= ~ROCKET_MODE_MASK; - switch (pc104[board][line]) { - case 422: - info->flags |= ROCKET_MODE_RS422; - break; - case 485: - info->flags |= ROCKET_MODE_RS485; - break; - case 232: - default: + if (board < ARRAY_SIZE(pc104) && line < ARRAY_SIZE(pc104_1)) + switch (pc104[board][line]) { + case 422: + info->flags |= ROCKET_MODE_RS422; + break; + case 485: + info->flags |= ROCKET_MODE_RS485; + break; + case 232: + default: + info->flags |= ROCKET_MODE_RS232; + break; + } + else info->flags |= ROCKET_MODE_RS232; - break; - } info->intmask = RXF_TRIG | TXFIFO_MT | SRC_INT | DELTA_CD | DELTA_CTS | DELTA_DSR; if (sInitChan(ctlp, &info->channel, aiop, chan) == 0) { diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index f513107b9ea0b6a309a03c13ccbaeb5d73332d94..e6c9ff65402aecec38a38dcd2fe975ad97845d27 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -452,16 +452,18 @@ static int platform_serdev_register_devices(struct serdev_controller *ctrl) return err; } + /** - * serdev_controller_add() - Add an serdev controller + * serdev_controller_add_platform() - Add an serdev controller * @ctrl: controller to be registered. + * @platform: whether to permit fallthrough to platform device probe * * Register a controller previously allocated via serdev_controller_alloc() with - * the serdev core. + * the serdev core. Optionally permit probing via a platform device fallback. */ -int serdev_controller_add(struct serdev_controller *ctrl) +int serdev_controller_add_platform(struct serdev_controller *ctrl, bool platform) { - int ret_of, ret_platform, ret; + int ret, ret_of, ret_platform = -ENODEV; /* Can't register until after driver model init */ if (WARN_ON(!is_registered)) @@ -471,8 +473,9 @@ int serdev_controller_add(struct serdev_controller *ctrl) if (ret) return ret; - ret_platform = platform_serdev_register_devices(ctrl); ret_of = of_serdev_register_devices(ctrl); + if (platform) + ret_platform = platform_serdev_register_devices(ctrl); if (ret_of && ret_platform) { dev_dbg(&ctrl->dev, "no devices registered: of:%d " "platform:%d\n", @@ -489,7 +492,7 @@ int serdev_controller_add(struct serdev_controller *ctrl) device_del(&ctrl->dev); return ret; }; -EXPORT_SYMBOL_GPL(serdev_controller_add); +EXPORT_SYMBOL_GPL(serdev_controller_add_platform); /* Remove a device associated with a controller */ static int serdev_remove_device(struct device *dev, void *data) diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index 69fc6d9ab490f90cea6b5e67b10e0b41f66d5726..0669e18ff8796b43810124cbbc85cd0ead8a3a54 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -15,9 +15,15 @@ #include #include #include +#include +#include #define SERPORT_ACTIVE 1 +static char *pdev_tty_port; +module_param(pdev_tty_port, charp, 0644); +MODULE_PARM_DESC(pdev_tty_port, "platform device tty port to claim"); + struct serport { struct tty_port *port; struct tty_struct *tty; @@ -238,9 +244,9 @@ struct device *serdev_tty_port_register(struct tty_port *port, struct device *parent, struct tty_driver *drv, int idx) { - const struct tty_port_client_operations *old_ops; struct serdev_controller *ctrl; struct serport *serport; + bool platform = false; int ret; if (!port || !drv || !parent) @@ -257,11 +263,27 @@ struct device *serdev_tty_port_register(struct tty_port *port, ctrl->ops = &ctrl_ops; - old_ops = port->client_ops; port->client_ops = &client_ops; port->client_data = ctrl; - ret = serdev_controller_add(ctrl); + /* There is not always a way to bind specific platform devices because + * they may be defined on platforms without DT or ACPI. When dealing + * with a platform devices, do not allow direct binding unless it is + * whitelisted by module parameter. If a platform device is otherwise + * described by DT or ACPI it will still be bound and this check will + * be ignored. + */ + if (parent->bus == &platform_bus_type) { + char tty_port_name[7]; + + sprintf(tty_port_name, "%s%d", drv->name, idx); + if (pdev_tty_port && + !strcmp(pdev_tty_port, tty_port_name)) { + platform = true; + } + } + + ret = serdev_controller_add_platform(ctrl, platform); if (ret) goto err_reset_data; @@ -270,7 +292,7 @@ struct device *serdev_tty_port_register(struct tty_port *port, err_reset_data: port->client_data = NULL; - port->client_ops = old_ops; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return ERR_PTR(ret); @@ -285,8 +307,8 @@ int serdev_tty_port_unregister(struct tty_port *port) return -ENODEV; serdev_controller_remove(ctrl); - port->client_ops = NULL; port->client_data = NULL; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return 0; diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 33a801353114c016ca7fe5fa0aebe507a3d2844b..0a89df390f248ec5209bb64b706d5ca82d72d826 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -256,7 +256,6 @@ static int aspeed_vuart_probe(struct platform_device *pdev) port.port.line = rc; port.port.irq = irq_of_parse_and_map(np, 0); - port.port.irqflags = IRQF_SHARED; port.port.iotype = UPIO_MEM; port.port.type = PORT_16550A; port.port.uartclk = clk; diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index c698ebab6d3bd5fa12b87fbdd83575b306227fdd..5017a0f46b826574e90c30fb94535b8de6b18742 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -181,7 +181,7 @@ static int serial_link_irq_chain(struct uart_8250_port *up) struct hlist_head *h; struct hlist_node *n; struct irq_info *i; - int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0; + int ret; mutex_lock(&hash_mutex); @@ -216,9 +216,8 @@ static int serial_link_irq_chain(struct uart_8250_port *up) INIT_LIST_HEAD(&up->list); i->head = &up->list; spin_unlock_irq(&i->lock); - irq_flags |= up->port.irqflags; ret = request_irq(up->port.irq, serial8250_interrupt, - irq_flags, up->port.name, i); + up->port.irqflags, up->port.name, i); if (ret < 0) serial_do_unlink(i, up); } diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index 411b4b03457bbbd7bac5c1f2ed542e81ebda47e8..899f36b59af798c085deb70daedf7e37ae1678cb 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -27,6 +27,14 @@ #include "8250.h" +#define PCI_DEVICE_ID_ACCES_COM_2S 0x1052 +#define PCI_DEVICE_ID_ACCES_COM_4S 0x105d +#define PCI_DEVICE_ID_ACCES_COM_8S 0x106c +#define PCI_DEVICE_ID_ACCES_COM232_8 0x10a8 +#define PCI_DEVICE_ID_ACCES_COM_2SM 0x10d2 +#define PCI_DEVICE_ID_ACCES_COM_4SM 0x10db +#define PCI_DEVICE_ID_ACCES_COM_8SM 0x10ea + #define PCI_DEVICE_ID_COMMTECH_4224PCI335 0x0002 #define PCI_DEVICE_ID_COMMTECH_4222PCI335 0x0004 #define PCI_DEVICE_ID_COMMTECH_2324PCI335 0x000a @@ -562,6 +570,22 @@ static int __maybe_unused exar_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(exar_pci_pm, exar_suspend, exar_resume); +static const struct exar8250_board acces_com_2x = { + .num_ports = 2, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_4x = { + .num_ports = 4, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_8x = { + .num_ports = 8, + .setup = pci_xr17c154_setup, +}; + + static const struct exar8250_board pbn_fastcom335_2 = { .num_ports = 2, .setup = pci_fastcom335_setup, @@ -632,6 +656,15 @@ static const struct exar8250_board pbn_exar_XR17V8358 = { } static const struct pci_device_id exar_pci_tbl[] = { + EXAR_DEVICE(ACCESSIO, ACCES_COM_2S, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4S, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8S, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM232_8, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_2SM, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4SM, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8SM, acces_com_8x), + + CONNECT_DEVICE(XR17C152, UART_2_232, pbn_connect), CONNECT_DEVICE(XR17C154, UART_4_232, pbn_connect), CONNECT_DEVICE(XR17C158, UART_8_232, pbn_connect), diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index a73d2bc4b6852c9a1b2523a0ad27524fc7984fde..90a93c001e169bc350d909df458c31522883e168 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2258,6 +2258,10 @@ int serial8250_do_startup(struct uart_port *port) } } + /* Check if we need to have shared IRQs */ + if (port->irq && (up->port.flags & UPF_SHARE_IRQ)) + up->port.irqflags |= IRQF_SHARED; + if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; /* diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index ed545a61413c045df92a11c0cbc188e03cdb2926..ac56a5131a9ccee0ed2aa07c82aac2ed2509936e 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -289,6 +289,10 @@ static void ar933x_uart_set_termios(struct uart_port *port, ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* reenable the UART */ ar933x_uart_rmw(up, AR933X_UART_CS_REG, AR933X_UART_CS_IF_MODE_M << AR933X_UART_CS_IF_MODE_S, @@ -421,6 +425,10 @@ static int ar933x_uart_startup(struct uart_port *port) ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* Enable RX interrupts */ up->ier = AR933X_UART_INT_RX_VALID; ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier); diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 367ce812743e22d6fedd538c14a17d147b6e8448..a00227d312d3f74afed6ed3953044f983224eb31 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -498,7 +498,8 @@ static void atmel_stop_tx(struct uart_port *port) atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); if (atmel_uart_is_half_duplex(port)) - atmel_start_rx(port); + if (!atomic_read(&atmel_port->tasklet_shutdown)) + atmel_start_rx(port); } diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index a81a5be0cf7a2a6740806b13d449d5c86630e0c4..630065b551f5f1ea57ddc687c62147fec54d9894 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -80,7 +80,7 @@ #define UCR1_IDEN (1<<12) /* Idle condition interrupt */ #define UCR1_ICD_REG(x) (((x) & 3) << 10) /* idle condition detect */ #define UCR1_RRDYEN (1<<9) /* Recv ready interrupt enable */ -#define UCR1_RDMAEN (1<<8) /* Recv ready DMA enable */ +#define UCR1_RXDMAEN (1<<8) /* Recv ready DMA enable */ #define UCR1_IREN (1<<7) /* Infrared interface enable */ #define UCR1_TXMPTYEN (1<<6) /* Transimitter empty interrupt enable */ #define UCR1_RTSDEN (1<<5) /* RTS delta interrupt enable */ @@ -352,6 +352,30 @@ static void imx_port_rts_auto(struct imx_port *sport, unsigned long *ucr2) *ucr2 |= UCR2_CTSC; } +/* + * interrupts disabled on entry + */ +static void imx_start_rx(struct uart_port *port) +{ + struct imx_port *sport = (struct imx_port *)port; + unsigned int ucr1, ucr2; + + ucr1 = readl(port->membase + UCR1); + ucr2 = readl(port->membase + UCR2); + + ucr2 |= UCR2_RXEN; + + if (sport->dma_is_enabled) { + ucr1 |= UCR1_RXDMAEN | UCR1_ATDMAEN; + } else { + ucr1 |= UCR1_RRDYEN; + } + + /* Write UCR2 first as it includes RXEN */ + writel(ucr2, port->membase + UCR2); + writel(ucr1, port->membase + UCR1); +} + /* * interrupts disabled on entry */ @@ -378,9 +402,10 @@ static void imx_stop_tx(struct uart_port *port) imx_port_rts_active(sport, &temp); else imx_port_rts_inactive(sport, &temp); - temp |= UCR2_RXEN; writel(temp, port->membase + UCR2); + imx_start_rx(port); + temp = readl(port->membase + UCR4); temp &= ~UCR4_TCEN; writel(temp, port->membase + UCR4); @@ -393,7 +418,7 @@ static void imx_stop_tx(struct uart_port *port) static void imx_stop_rx(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; - unsigned long temp; + unsigned long ucr1, ucr2; if (sport->dma_is_enabled && sport->dma_is_rxing) { if (sport->port.suspended) { @@ -404,12 +429,18 @@ static void imx_stop_rx(struct uart_port *port) } } - temp = readl(sport->port.membase + UCR2); - writel(temp & ~UCR2_RXEN, sport->port.membase + UCR2); + ucr1 = readl(sport->port.membase + UCR1); + ucr2 = readl(sport->port.membase + UCR2); - /* disable the `Receiver Ready Interrrupt` */ - temp = readl(sport->port.membase + UCR1); - writel(temp & ~UCR1_RRDYEN, sport->port.membase + UCR1); + if (sport->dma_is_enabled) { + ucr1 &= ~(UCR1_RXDMAEN | UCR1_ATDMAEN); + } else { + ucr1 &= ~UCR1_RRDYEN; + } + writel(ucr1, port->membase + UCR1); + + ucr2 &= ~UCR2_RXEN; + writel(ucr2, port->membase + UCR2); } /* @@ -526,7 +557,7 @@ static void imx_dma_tx(struct imx_port *sport) sport->tx_bytes = uart_circ_chars_pending(xmit); - if (xmit->tail < xmit->head) { + if (xmit->tail < xmit->head || xmit->head == 0) { sport->dma_tx_nents = 1; sg_init_one(sgl, xmit->buf + xmit->tail, sport->tx_bytes); } else { @@ -581,10 +612,11 @@ static void imx_start_tx(struct uart_port *port) imx_port_rts_active(sport, &temp); else imx_port_rts_inactive(sport, &temp); - if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - temp &= ~UCR2_RXEN; writel(temp, port->membase + UCR2); + if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) + imx_stop_rx(port); + /* enable transmitter and shifter empty irq */ temp = readl(port->membase + UCR4); temp |= UCR4_TCEN; @@ -811,14 +843,42 @@ static void imx_mctrl_check(struct imx_port *sport) static irqreturn_t imx_int(int irq, void *dev_id) { struct imx_port *sport = dev_id; - unsigned int sts; - unsigned int sts2; + unsigned int usr1, usr2, ucr1, ucr2, ucr3, ucr4; irqreturn_t ret = IRQ_NONE; - sts = readl(sport->port.membase + USR1); - sts2 = readl(sport->port.membase + USR2); + usr1 = readl(sport->port.membase + USR1); + usr2 = readl(sport->port.membase + USR2); + ucr1 = readl(sport->port.membase + UCR1); + ucr2 = readl(sport->port.membase + UCR2); + ucr3 = readl(sport->port.membase + UCR3); + ucr4 = readl(sport->port.membase + UCR4); - if (sts & (USR1_RRDY | USR1_AGTIM)) { + /* + * Even if a condition is true that can trigger an irq only handle it if + * the respective irq source is enabled. This prevents some undesired + * actions, for example if a character that sits in the RX FIFO and that + * should be fetched via DMA is tried to be fetched using PIO. Or the + * receiver is currently off and so reading from URXD0 results in an + * exception. So just mask the (raw) status bits for disabled irqs. + */ + if ((ucr1 & UCR1_RRDYEN) == 0) + usr1 &= ~USR1_RRDY; + if ((ucr2 & UCR2_ATEN) == 0) + usr1 &= ~USR1_AGTIM; + if ((ucr1 & UCR1_TXMPTYEN) == 0) + usr1 &= ~USR1_TRDY; + if ((ucr4 & UCR4_TCEN) == 0) + usr2 &= ~USR2_TXDC; + if ((ucr3 & UCR3_DTRDEN) == 0) + usr1 &= ~USR1_DTRD; + if ((ucr1 & UCR1_RTSDEN) == 0) + usr1 &= ~USR1_RTSD; + if ((ucr3 & UCR3_AWAKEN) == 0) + usr1 &= ~USR1_AWAKE; + if ((ucr4 & UCR4_OREN) == 0) + usr2 &= ~USR2_ORE; + + if (usr1 & (USR1_RRDY | USR1_AGTIM)) { if (sport->dma_is_enabled) imx_dma_rxint(sport); else @@ -826,18 +886,15 @@ static irqreturn_t imx_int(int irq, void *dev_id) ret = IRQ_HANDLED; } - if ((sts & USR1_TRDY && - readl(sport->port.membase + UCR1) & UCR1_TXMPTYEN) || - (sts2 & USR2_TXDC && - readl(sport->port.membase + UCR4) & UCR4_TCEN)) { + if ((usr1 & USR1_TRDY) || (usr2 & USR2_TXDC)) { imx_txint(irq, dev_id); ret = IRQ_HANDLED; } - if (sts & USR1_DTRD) { + if (usr1 & USR1_DTRD) { unsigned long flags; - if (sts & USR1_DTRD) + if (usr1 & USR1_DTRD) writel(USR1_DTRD, sport->port.membase + USR1); spin_lock_irqsave(&sport->port.lock, flags); @@ -847,17 +904,17 @@ static irqreturn_t imx_int(int irq, void *dev_id) ret = IRQ_HANDLED; } - if (sts & USR1_RTSD) { + if (usr1 & USR1_RTSD) { imx_rtsint(irq, dev_id); ret = IRQ_HANDLED; } - if (sts & USR1_AWAKE) { + if (usr1 & USR1_AWAKE) { writel(USR1_AWAKE, sport->port.membase + USR1); ret = IRQ_HANDLED; } - if (sts2 & USR2_ORE) { + if (usr2 & USR2_ORE) { sport->port.icount.overrun++; writel(USR2_ORE, sport->port.membase + USR2); ret = IRQ_HANDLED; @@ -1206,7 +1263,7 @@ static void imx_enable_dma(struct imx_port *sport) /* set UCR1 */ temp = readl(sport->port.membase + UCR1); - temp |= UCR1_RDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN; + temp |= UCR1_RXDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN; writel(temp, sport->port.membase + UCR1); temp = readl(sport->port.membase + UCR2); @@ -1224,7 +1281,7 @@ static void imx_disable_dma(struct imx_port *sport) /* clear UCR1 */ temp = readl(sport->port.membase + UCR1); - temp &= ~(UCR1_RDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN); + temp &= ~(UCR1_RXDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN); writel(temp, sport->port.membase + UCR1); /* clear UCR2 */ @@ -1289,11 +1346,9 @@ static int imx_startup(struct uart_port *port) writel(USR1_RTSD | USR1_DTRD, sport->port.membase + USR1); writel(USR2_ORE, sport->port.membase + USR2); - if (sport->dma_is_inited && !sport->dma_is_enabled) - imx_enable_dma(sport); - temp = readl(sport->port.membase + UCR1); - temp |= UCR1_RRDYEN | UCR1_UARTEN; + temp &= ~UCR1_RRDYEN; + temp |= UCR1_UARTEN; if (sport->have_rtscts) temp |= UCR1_RTSDEN; @@ -1332,14 +1387,13 @@ static int imx_startup(struct uart_port *port) */ imx_enable_ms(&sport->port); - /* - * Start RX DMA immediately instead of waiting for RX FIFO interrupts. - * In our iMX53 the average delay for the first reception dropped from - * approximately 35000 microseconds to 1000 microseconds. - */ - if (sport->dma_is_enabled) { - imx_disable_rx_int(sport); + if (sport->dma_is_inited) { + imx_enable_dma(sport); start_rx_dma(sport); + } else { + temp = readl(sport->port.membase + UCR1); + temp |= UCR1_RRDYEN; + writel(temp, sport->port.membase + UCR1); } spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1386,7 +1440,8 @@ static void imx_shutdown(struct uart_port *port) spin_lock_irqsave(&sport->port.lock, flags); temp = readl(sport->port.membase + UCR1); - temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN); + temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN | + UCR1_RXDMAEN | UCR1_ATDMAEN); writel(temp, sport->port.membase + UCR1); spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1659,7 +1714,7 @@ static int imx_poll_init(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; unsigned long flags; - unsigned long temp; + unsigned long ucr1, ucr2; int retval; retval = clk_prepare_enable(sport->clk_ipg); @@ -1673,16 +1728,29 @@ static int imx_poll_init(struct uart_port *port) spin_lock_irqsave(&sport->port.lock, flags); - temp = readl(sport->port.membase + UCR1); + /* + * Be careful about the order of enabling bits here. First enable the + * receiver (UARTEN + RXEN) and only then the corresponding irqs. + * This prevents that a character that already sits in the RX fifo is + * triggering an irq but the try to fetch it from there results in an + * exception because UARTEN or RXEN is still off. + */ + ucr1 = readl(port->membase + UCR1); + ucr2 = readl(port->membase + UCR2); + if (is_imx1_uart(sport)) - temp |= IMX1_UCR1_UARTCLKEN; - temp |= UCR1_UARTEN | UCR1_RRDYEN; - temp &= ~(UCR1_TXMPTYEN | UCR1_RTSDEN); - writel(temp, sport->port.membase + UCR1); + ucr1 |= IMX1_UCR1_UARTCLKEN; - temp = readl(sport->port.membase + UCR2); - temp |= UCR2_RXEN; - writel(temp, sport->port.membase + UCR2); + ucr1 |= UCR1_UARTEN; + ucr1 &= ~(UCR1_TXMPTYEN | UCR1_RTSDEN | UCR1_RRDYEN); + + ucr2 |= UCR2_RXEN; + + writel(ucr1, sport->port.membase + UCR1); + writel(ucr2, sport->port.membase + UCR2); + + /* now enable irqs */ + writel(ucr1 | UCR1_RRDYEN, sport->port.membase + UCR1); spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1742,11 +1810,8 @@ static int imx_rs485_config(struct uart_port *port, /* Make sure Rx is enabled in case Tx is active with Rx disabled */ if (!(rs485conf->flags & SER_RS485_ENABLED) || - rs485conf->flags & SER_RS485_RX_DURING_TX) { - temp = readl(sport->port.membase + UCR2); - temp |= UCR2_RXEN; - writel(temp, sport->port.membase + UCR2); - } + rs485conf->flags & SER_RS485_RX_DURING_TX) + imx_start_rx(port); port->rs485 = *rs485conf; diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 401c983ec5f3651bc442b315566fc4dfc63ed627..a10e4aa9e18eaffb59e1df06ec47a4ff7865b867 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -581,7 +581,7 @@ static int mvebu_uart_probe(struct platform_device *pdev) port->membase = devm_ioremap_resource(&pdev->dev, reg); if (IS_ERR(port->membase)) - return -PTR_ERR(port->membase); + return PTR_ERR(port->membase); data = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_uart_data), GFP_KERNEL); diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 333de7d3fe8668566a76dd833dad7440fa9d5e44..06cf474072d6dfe73c9c3c0e0386a09cd184836e 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -841,9 +841,16 @@ static void sci_receive_chars(struct uart_port *port) tty_insert_flip_char(tport, c, TTY_NORMAL); } else { for (i = 0; i < count; i++) { - char c = serial_port_in(port, SCxRDR); - - status = serial_port_in(port, SCxSR); + char c; + + if (port->type == PORT_SCIF || + port->type == PORT_HSCIF) { + status = serial_port_in(port, SCxSR); + c = serial_port_in(port, SCxRDR); + } else { + c = serial_port_in(port, SCxRDR); + status = serial_port_in(port, SCxSR); + } if (uart_handle_sysrq_char(port, c)) { count--; i--; continue; diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 344e8c427c7e2c606e2df2d7bde4615bf512951c..9d68f89a2bf8dac451c452445c70f513ec7645c2 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1349,10 +1349,10 @@ static void throttle(struct tty_struct * tty) DBGINFO(("%s throttle\n", info->device_name)); if (I_IXOFF(tty)) send_xchar(tty, STOP_CHAR(tty)); - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals &= ~SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1374,10 +1374,10 @@ static void unthrottle(struct tty_struct * tty) else send_xchar(tty, START_CHAR(tty)); } - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals |= SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -2575,8 +2575,8 @@ static void change_params(struct slgt_info *info) info->read_status_mask = IRQ_RXOVER; if (I_INPCK(info->port.tty)) info->read_status_mask |= MASK_PARITY | MASK_FRAMING; - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) - info->read_status_mask |= MASK_BREAK; + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) + info->read_status_mask |= MASK_BREAK; if (I_IGNPAR(info->port.tty)) info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING; if (I_IGNBRK(info->port.tty)) { @@ -3207,7 +3207,7 @@ static int tiocmset(struct tty_struct *tty, info->signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; } @@ -3218,7 +3218,7 @@ static int carrier_raised(struct tty_port *port) struct slgt_info *info = container_of(port, struct slgt_info, port); spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->signals & SerialSignal_DCD) ? 1 : 0; } @@ -3233,7 +3233,7 @@ static void dtr_rts(struct tty_port *port, int on) info->signals |= SerialSignal_RTS | SerialSignal_DTR; else info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index 4fed9e7b281f0c5faad059783205b190be8b15d6..3c9e314406b4efe8c91d630b9dadbbef79abf378 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -1467,10 +1467,10 @@ static void throttle(struct tty_struct * tty) if (I_IXOFF(tty)) send_xchar(tty, STOP_CHAR(tty)); - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->serial_signals &= ~SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1496,10 +1496,10 @@ static void unthrottle(struct tty_struct * tty) send_xchar(tty, START_CHAR(tty)); } - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->serial_signals |= SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -2484,7 +2484,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status ) if (status & SerialSignal_CTS) { if ( debug_level >= DEBUG_LEVEL_ISR ) printk("CTS tx start..."); - info->port.tty->hw_stopped = 0; + info->port.tty->hw_stopped = 0; tx_start(info); info->pending_bh |= BH_TRANSMIT; return; @@ -2493,7 +2493,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status ) if (!(status & SerialSignal_CTS)) { if ( debug_level >= DEBUG_LEVEL_ISR ) printk("CTS tx stop..."); - info->port.tty->hw_stopped = 1; + info->port.tty->hw_stopped = 1; tx_stop(info); } } @@ -2820,8 +2820,8 @@ static void change_params(SLMP_INFO *info) info->read_status_mask2 = OVRN; if (I_INPCK(info->port.tty)) info->read_status_mask2 |= PE | FRME; - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) - info->read_status_mask1 |= BRKD; + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) + info->read_status_mask1 |= BRKD; if (I_IGNPAR(info->port.tty)) info->ignore_status_mask2 |= PE | FRME; if (I_IGNBRK(info->port.tty)) { @@ -3191,7 +3191,7 @@ static int tiocmget(struct tty_struct *tty) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); result = ((info->serial_signals & SerialSignal_RTS) ? TIOCM_RTS : 0) | @@ -3229,7 +3229,7 @@ static int tiocmset(struct tty_struct *tty, info->serial_signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; @@ -3241,7 +3241,7 @@ static int carrier_raised(struct tty_port *port) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; @@ -3257,7 +3257,7 @@ static void dtr_rts(struct tty_port *port, int on) info->serial_signals |= SerialSignal_RTS | SerialSignal_DTR; else info->serial_signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 4e6a3713fae0dc68375afbfc9425ef1e6d9accba..ba64768f5764b3c50affce7d16286ca3918f3041 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -135,17 +135,10 @@ static struct sysrq_key_op sysrq_unraw_op = { static void sysrq_handle_crash(int key) { - char *killer = NULL; - - /* we need to release the RCU read lock here, - * otherwise we get an annoying - * 'BUG: sleeping function called from invalid context' - * complaint from the kernel before the panic. - */ + /* release the RCU read lock before crashing */ rcu_read_unlock(); - panic_on_oops = 1; /* force panic */ - wmb(); - *killer = 1; + + panic("sysrq triggered crash\n"); } static struct sysrq_key_op sysrq_crash_op = { .handler = sysrq_handle_crash, @@ -547,7 +540,6 @@ void __handle_sysrq(int key, bool check_mask) */ orig_log_level = console_loglevel; console_loglevel = CONSOLE_LOGLEVEL_DEFAULT; - pr_info("SysRq : "); op_p = __sysrq_get_key_op(key); if (op_p) { @@ -556,14 +548,15 @@ void __handle_sysrq(int key, bool check_mask) * should not) and is the invoked operation enabled? */ if (!check_mask || sysrq_on_mask(op_p->enable_mask)) { - pr_cont("%s\n", op_p->action_msg); + pr_info("%s\n", op_p->action_msg); console_loglevel = orig_log_level; op_p->handler(key); } else { - pr_cont("This sysrq operation is disabled.\n"); + pr_info("This sysrq operation is disabled.\n"); + console_loglevel = orig_log_level; } } else { - pr_cont("HELP : "); + pr_info("HELP : "); /* Only print the help msg once per handler */ for (i = 0; i < ARRAY_SIZE(sysrq_key_table); i++) { if (sysrq_key_table[i]) { diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index c93a33701d32acfd5fc36029702dc2139d985b96..dd12c3b86eb461427b700928aa285fcce7d13e66 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -51,10 +51,11 @@ static void tty_port_default_wakeup(struct tty_port *port) } } -static const struct tty_port_client_operations default_client_ops = { +const struct tty_port_client_operations tty_port_default_client_ops = { .receive_buf = tty_port_default_receive_buf, .write_wakeup = tty_port_default_wakeup, }; +EXPORT_SYMBOL_GPL(tty_port_default_client_ops); void tty_port_init(struct tty_port *port) { @@ -67,7 +68,7 @@ void tty_port_init(struct tty_port *port) spin_lock_init(&port->lock); port->close_delay = (50 * HZ) / 100; port->closing_wait = (3000 * HZ) / 100; - port->client_ops = &default_client_ops; + port->client_ops = &tty_port_default_client_ops; kref_init(&port->kref); } EXPORT_SYMBOL(tty_port_init); diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 7a4c8022c02375828608e5fc7ac9e8b58e6dc80e..8687b17f6cf0245b83b8dd762312b6afadd11cf5 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,8 @@ #include #include +#include + /* Don't take this from : 011-015 on the screen aren't spaces */ #define isspace(c) ((c) == ' ') @@ -41,6 +44,7 @@ static volatile int sel_start = -1; /* cleared by clear_selection */ static int sel_end; static int sel_buffer_lth; static char *sel_buffer; +static DEFINE_MUTEX(sel_lock); /* clear_selection, highlight and highlight_pointer can be called from interrupt (via scrollback/front) */ @@ -79,6 +83,11 @@ void clear_selection(void) } } +bool vc_is_sel(struct vc_data *vc) +{ + return vc == sel_cons; +} + /* * User settable table: what characters are to be considered alphabetic? * 128 bits. Locked by the console lock. @@ -153,14 +162,14 @@ static int store_utf8(u16 c, char *p) * The entire selection process is managed under the console_lock. It's * a lot under the lock but its hardly a performance path */ -int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty) +static int __set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty) { struct vc_data *vc = vc_cons[fg_console].d; int sel_mode, new_sel_start, new_sel_end, spc; char *bp, *obp; int i, ps, pe, multiplier; u16 c; - int mode; + int mode, ret = 0; poke_blanked_console(); @@ -321,7 +330,21 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t } } sel_buffer_lth = bp - sel_buffer; - return 0; + + return ret; +} + +int set_selection(const struct tiocl_selection __user *v, struct tty_struct *tty) +{ + int ret; + + mutex_lock(&sel_lock); + console_lock(); + ret = __set_selection(v, tty); + console_unlock(); + mutex_unlock(&sel_lock); + + return ret; } /* Insert the contents of the selection buffer into the @@ -338,6 +361,7 @@ int paste_selection(struct tty_struct *tty) unsigned int count; struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); + int ret = 0; console_lock(); poke_blanked_console(); @@ -349,10 +373,17 @@ int paste_selection(struct tty_struct *tty) tty_buffer_lock_exclusive(&vc->port); add_wait_queue(&vc->paste_wait, &wait); + mutex_lock(&sel_lock); while (sel_buffer && sel_buffer_lth > pasted) { set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) { + ret = -EINTR; + break; + } if (tty_throttled(tty)) { + mutex_unlock(&sel_lock); schedule(); + mutex_lock(&sel_lock); continue; } __set_current_state(TASK_RUNNING); @@ -361,10 +392,11 @@ int paste_selection(struct tty_struct *tty) count); pasted += count; } + mutex_unlock(&sel_lock); remove_wait_queue(&vc->paste_wait, &wait); __set_current_state(TASK_RUNNING); tty_buffer_unlock_exclusive(&vc->port); tty_ldisc_deref(ld); - return 0; + return ret; } diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 06761fcedeff4595b19b75204c280ce947188f10..8a4e7879a7a6e73015264538c0bb1ac4066be99a 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -584,8 +584,9 @@ static void hide_softcursor(struct vc_data *vc) static void hide_cursor(struct vc_data *vc) { - if (vc == sel_cons) + if (vc_is_sel(vc)) clear_selection(); + vc->vc_sw->con_cursor(vc, CM_ERASE); hide_softcursor(vc); } @@ -595,7 +596,7 @@ static void set_cursor(struct vc_data *vc) if (!con_is_fg(vc) || console_blanked || vc->vc_mode == KD_GRAPHICS) return; if (vc->vc_deccm) { - if (vc == sel_cons) + if (vc_is_sel(vc)) clear_selection(); add_softcursor(vc); if ((vc->vc_cursor_type & 0x0f) != 1) @@ -750,6 +751,17 @@ static void visual_init(struct vc_data *vc, int num, int init) vc->vc_screenbuf_size = vc->vc_rows * vc->vc_size_row; } +static void vc_port_destruct(struct tty_port *port) +{ + struct vc_data *vc = container_of(port, struct vc_data, port); + + kfree(vc); +} + +static const struct tty_port_operations vc_port_ops = { + .destruct = vc_port_destruct, +}; + int vc_allocate(unsigned int currcons) /* return 0 on success */ { struct vt_notifier_param param; @@ -775,6 +787,7 @@ int vc_allocate(unsigned int currcons) /* return 0 on success */ vc_cons[currcons].d = vc; tty_port_init(&vc->port); + vc->port.ops = &vc_port_ops; INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK); visual_init(vc, currcons, 1); @@ -867,13 +880,13 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) return 0; - if (new_screen_size > (4 << 20)) + if (new_screen_size > KMALLOC_MAX_SIZE) return -EINVAL; newscreen = kzalloc(new_screen_size, GFP_USER); if (!newscreen) return -ENOMEM; - if (vc == sel_cons) + if (vc_is_sel(vc)) clear_selection(); old_rows = vc->vc_rows; @@ -2688,9 +2701,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) switch (type) { case TIOCL_SETSEL: - console_lock(); ret = set_selection((struct tiocl_selection __user *)(p+1), tty); - console_unlock(); break; case TIOCL_PASTESEL: ret = paste_selection(tty); @@ -2896,6 +2907,7 @@ static int con_install(struct tty_driver *driver, struct tty_struct *tty) tty->driver_data = vc; vc->port.tty = tty; + tty_port_get(&vc->port); if (!tty->winsize.ws_row && !tty->winsize.ws_col) { tty->winsize.ws_row = vc_cons[currcons].d->vc_rows; @@ -2931,6 +2943,13 @@ static void con_shutdown(struct tty_struct *tty) console_unlock(); } +static void con_cleanup(struct tty_struct *tty) +{ + struct vc_data *vc = tty->driver_data; + + tty_port_put(&vc->port); +} + static int default_color = 7; /* white */ static int default_italic_color = 2; // green (ASCII) static int default_underline_color = 3; // cyan (ASCII) @@ -3055,7 +3074,8 @@ static const struct tty_operations con_ops = { .throttle = con_throttle, .unthrottle = con_unthrottle, .resize = vt_resize, - .shutdown = con_shutdown + .shutdown = con_shutdown, + .cleanup = con_cleanup, }; static struct cdev vc0_cdev; diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 7b34b0ddbf0e0281ec8ca723fb6469b5ac43d30f..699ad55e3ec60f128d3d0cefaa5de204980e4a32 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -39,11 +39,32 @@ #include #include -char vt_dont_switch; -extern struct tty_driver *console_driver; +bool vt_dont_switch; -#define VT_IS_IN_USE(i) (console_driver->ttys[i] && console_driver->ttys[i]->count) -#define VT_BUSY(i) (VT_IS_IN_USE(i) || i == fg_console || vc_cons[i].d == sel_cons) +static inline bool vt_in_use(unsigned int i) +{ + const struct vc_data *vc = vc_cons[i].d; + + /* + * console_lock must be held to prevent the vc from being deallocated + * while we're checking whether it's in-use. + */ + WARN_CONSOLE_UNLOCKED(); + + return vc && kref_read(&vc->port.kref) > 1; +} + +static inline bool vt_busy(int i) +{ + if (vt_in_use(i)) + return true; + if (i == fg_console) + return true; + if (vc_is_sel(vc_cons[i].d)) + return true; + + return false; +} /* * Console (vt and kd) routines, as defined by USL SVR4 manual, and by @@ -289,16 +310,14 @@ static int vt_disallocate(unsigned int vc_num) int ret = 0; console_lock(); - if (VT_BUSY(vc_num)) + if (vt_busy(vc_num)) ret = -EBUSY; else if (vc_num) vc = vc_deallocate(vc_num); console_unlock(); - if (vc && vc_num >= MIN_NR_CONSOLES) { - tty_port_destroy(&vc->port); - kfree(vc); - } + if (vc && vc_num >= MIN_NR_CONSOLES) + tty_port_put(&vc->port); return ret; } @@ -311,17 +330,15 @@ static void vt_disallocate_all(void) console_lock(); for (i = 1; i < MAX_NR_CONSOLES; i++) - if (!VT_BUSY(i)) + if (!vt_busy(i)) vc[i] = vc_deallocate(i); else vc[i] = NULL; console_unlock(); for (i = 1; i < MAX_NR_CONSOLES; i++) { - if (vc[i] && i >= MIN_NR_CONSOLES) { - tty_port_destroy(&vc[i]->port); - kfree(vc[i]); - } + if (vc[i] && i >= MIN_NR_CONSOLES) + tty_port_put(&vc[i]->port); } } @@ -335,22 +352,13 @@ int vt_ioctl(struct tty_struct *tty, { struct vc_data *vc = tty->driver_data; struct console_font_op op; /* used in multiple places here */ - unsigned int console; + unsigned int console = vc->vc_num; unsigned char ucval; unsigned int uival; void __user *up = (void __user *)arg; int i, perm; int ret = 0; - console = vc->vc_num; - - - if (!vc_cons_allocated(console)) { /* impossible? */ - ret = -ENOIOCTLCMD; - goto out; - } - - /* * To have permissions to do most of the vt ioctls, we either have * to be the owner of the tty, or have CAP_SYS_TTY_CONFIG. @@ -641,15 +649,16 @@ int vt_ioctl(struct tty_struct *tty, struct vt_stat __user *vtstat = up; unsigned short state, mask; - /* Review: FIXME: Console lock ? */ if (put_user(fg_console + 1, &vtstat->v_active)) ret = -EFAULT; else { state = 1; /* /dev/tty0 is always open */ + console_lock(); /* required by vt_in_use() */ for (i = 0, mask = 2; i < MAX_NR_CONSOLES && mask; ++i, mask <<= 1) - if (VT_IS_IN_USE(i)) + if (vt_in_use(i)) state |= mask; + console_unlock(); ret = put_user(state, &vtstat->v_state); } break; @@ -659,10 +668,11 @@ int vt_ioctl(struct tty_struct *tty, * Returns the first available (non-opened) console. */ case VT_OPENQRY: - /* FIXME: locking ? - but then this is a stupid API */ + console_lock(); /* required by vt_in_use() */ for (i = 0; i < MAX_NR_CONSOLES; ++i) - if (! VT_IS_IN_USE(i)) + if (!vt_in_use(i)) break; + console_unlock(); uival = i < MAX_NR_CONSOLES ? (i+1) : -1; goto setint; @@ -847,58 +857,49 @@ int vt_ioctl(struct tty_struct *tty, case VT_RESIZEX: { - struct vt_consize __user *vtconsize = up; - ushort ll,cc,vlin,clin,vcol,ccol; + struct vt_consize v; if (!perm) return -EPERM; - if (!access_ok(VERIFY_READ, vtconsize, - sizeof(struct vt_consize))) { - ret = -EFAULT; - break; - } + if (copy_from_user(&v, up, sizeof(struct vt_consize))) + return -EFAULT; /* FIXME: Should check the copies properly */ - __get_user(ll, &vtconsize->v_rows); - __get_user(cc, &vtconsize->v_cols); - __get_user(vlin, &vtconsize->v_vlin); - __get_user(clin, &vtconsize->v_clin); - __get_user(vcol, &vtconsize->v_vcol); - __get_user(ccol, &vtconsize->v_ccol); - vlin = vlin ? vlin : vc->vc_scan_lines; - if (clin) { - if (ll) { - if (ll != vlin/clin) { - /* Parameters don't add up */ - ret = -EINVAL; - break; - } - } else - ll = vlin/clin; + if (!v.v_vlin) + v.v_vlin = vc->vc_scan_lines; + if (v.v_clin) { + int rows = v.v_vlin/v.v_clin; + if (v.v_rows != rows) { + if (v.v_rows) /* Parameters don't add up */ + return -EINVAL; + v.v_rows = rows; + } } - if (vcol && ccol) { - if (cc) { - if (cc != vcol/ccol) { - ret = -EINVAL; - break; - } - } else - cc = vcol/ccol; + if (v.v_vcol && v.v_ccol) { + int cols = v.v_vcol/v.v_ccol; + if (v.v_cols != cols) { + if (v.v_cols) + return -EINVAL; + v.v_cols = cols; + } } - if (clin > 32) { - ret = -EINVAL; - break; - } - + if (v.v_clin > 32) + return -EINVAL; + for (i = 0; i < MAX_NR_CONSOLES; i++) { + struct vc_data *vcp; + if (!vc_cons[i].d) continue; console_lock(); - if (vlin) - vc_cons[i].d->vc_scan_lines = vlin; - if (clin) - vc_cons[i].d->vc_font.height = clin; - vc_cons[i].d->vc_resize_user = 1; - vc_resize(vc_cons[i].d, cc, ll); + vcp = vc_cons[i].d; + if (vcp) { + if (v.v_vlin) + vcp->vc_scan_lines = v.v_vlin; + if (v.v_clin) + vcp->vc_font.height = v.v_clin; + vcp->vc_resize_user = 1; + vc_resize(vcp, v.v_cols, v.v_rows); + } console_unlock(); } break; @@ -1020,12 +1021,12 @@ int vt_ioctl(struct tty_struct *tty, case VT_LOCKSWITCH: if (!capable(CAP_SYS_TTY_CONFIG)) return -EPERM; - vt_dont_switch = 1; + vt_dont_switch = true; break; case VT_UNLOCKSWITCH: if (!capable(CAP_SYS_TTY_CONFIG)) return -EPERM; - vt_dont_switch = 0; + vt_dont_switch = false; break; case VT_GETHIFONTMASK: ret = put_user(vc->vc_hi_font_mask, @@ -1189,18 +1190,10 @@ long vt_compat_ioctl(struct tty_struct *tty, { struct vc_data *vc = tty->driver_data; struct console_font_op op; /* used in multiple places here */ - unsigned int console; void __user *up = (void __user *)arg; int perm; int ret = 0; - console = vc->vc_num; - - if (!vc_cons_allocated(console)) { /* impossible? */ - ret = -ENOIOCTLCMD; - goto out; - } - /* * To have permissions to do most of the vt ioctls, we either have * to be the owner of the tty, or have CAP_SYS_TTY_CONFIG. @@ -1260,7 +1253,7 @@ long vt_compat_ioctl(struct tty_struct *tty, arg = (unsigned long)compat_ptr(arg); goto fallback; } -out: + return ret; fallback: diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c index e1134a4d97f3fd0005e30dc29e9acf9da77e0e46..a00b4aee6c799cd12b8273db0101990c6437033a 100644 --- a/drivers/uio/uio_dmem_genirq.c +++ b/drivers/uio/uio_dmem_genirq.c @@ -135,11 +135,13 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on) if (irq_on) { if (test_and_clear_bit(0, &priv->flags)) enable_irq(dev_info->irq); + spin_unlock_irqrestore(&priv->lock, flags); } else { - if (!test_and_set_bit(0, &priv->flags)) + if (!test_and_set_bit(0, &priv->flags)) { + spin_unlock_irqrestore(&priv->lock, flags); disable_irq(dev_info->irq); + } } - spin_unlock_irqrestore(&priv->lock, flags); return 0; } diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 0453f0eb11781c29f6e4f88b05d51d840ba6ae72..38709bee4c202f5216a2fd22b248a34e75f96a19 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -424,9 +424,12 @@ static void acm_ctrl_irq(struct urb *urb) exit: retval = usb_submit_urb(urb, GFP_ATOMIC); - if (retval && retval != -EPERM) + if (retval && retval != -EPERM && retval != -ENODEV) dev_err(&acm->control->dev, "%s - usb_submit_urb failed: %d\n", __func__, retval); + else + dev_vdbg(&acm->control->dev, + "control resubmission terminated %d\n", retval); } static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags) @@ -442,6 +445,8 @@ static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags) dev_err(&acm->data->dev, "urb %d failed submission with %d\n", index, res); + } else { + dev_vdbg(&acm->data->dev, "intended failure %d\n", res); } set_bit(index, &acm->read_urbs_free); return res; @@ -484,6 +489,7 @@ static void acm_read_bulk_callback(struct urb *urb) int status = urb->status; bool stopped = false; bool stalled = false; + bool cooldown = false; dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n", rb->index, urb->actual_length, status); @@ -510,6 +516,14 @@ static void acm_read_bulk_callback(struct urb *urb) __func__, status); stopped = true; break; + case -EOVERFLOW: + case -EPROTO: + dev_dbg(&acm->data->dev, + "%s - cooling babbling device\n", __func__); + usb_mark_last_busy(acm->dev); + set_bit(rb->index, &acm->urbs_in_error_delay); + cooldown = true; + break; default: dev_dbg(&acm->data->dev, "%s - nonzero urb status received: %d\n", @@ -531,9 +545,11 @@ static void acm_read_bulk_callback(struct urb *urb) */ smp_mb__after_atomic(); - if (stopped || stalled) { + if (stopped || stalled || cooldown) { if (stalled) schedule_work(&acm->work); + else if (cooldown) + schedule_delayed_work(&acm->dwork, HZ / 2); return; } @@ -575,14 +591,20 @@ static void acm_softint(struct work_struct *work) struct acm *acm = container_of(work, struct acm, work); if (test_bit(EVENT_RX_STALL, &acm->flags)) { - if (!(usb_autopm_get_interface(acm->data))) { + smp_mb(); /* against acm_suspend() */ + if (!acm->susp_count) { for (i = 0; i < acm->rx_buflimit; i++) usb_kill_urb(acm->read_urbs[i]); usb_clear_halt(acm->dev, acm->in); acm_submit_read_urbs(acm, GFP_KERNEL); - usb_autopm_put_interface(acm->data); + clear_bit(EVENT_RX_STALL, &acm->flags); } - clear_bit(EVENT_RX_STALL, &acm->flags); + } + + if (test_and_clear_bit(ACM_ERROR_DELAY, &acm->flags)) { + for (i = 0; i < ACM_NR; i++) + if (test_and_clear_bit(i, &acm->urbs_in_error_delay)) + acm_submit_read_urb(acm, i, GFP_NOIO); } if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags)) @@ -926,10 +948,10 @@ static int get_serial_info(struct acm *acm, struct serial_struct __user *info) memset(&tmp, 0, sizeof(tmp)); tmp.xmit_fifo_size = acm->writesize; tmp.baud_base = le32_to_cpu(acm->line.dwDTERate); - tmp.close_delay = acm->port.close_delay / 10; + tmp.close_delay = jiffies_to_msecs(acm->port.close_delay) / 10; tmp.closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ? ASYNC_CLOSING_WAIT_NONE : - acm->port.closing_wait / 10; + jiffies_to_msecs(acm->port.closing_wait) / 10; if (copy_to_user(info, &tmp, sizeof(tmp))) return -EFAULT; @@ -942,20 +964,28 @@ static int set_serial_info(struct acm *acm, { struct serial_struct new_serial; unsigned int closing_wait, close_delay; + unsigned int old_closing_wait, old_close_delay; int retval = 0; if (copy_from_user(&new_serial, newinfo, sizeof(new_serial))) return -EFAULT; - close_delay = new_serial.close_delay * 10; + close_delay = msecs_to_jiffies(new_serial.close_delay * 10); closing_wait = new_serial.closing_wait == ASYNC_CLOSING_WAIT_NONE ? - ASYNC_CLOSING_WAIT_NONE : new_serial.closing_wait * 10; + ASYNC_CLOSING_WAIT_NONE : + msecs_to_jiffies(new_serial.closing_wait * 10); + + /* we must redo the rounding here, so that the values match */ + old_close_delay = jiffies_to_msecs(acm->port.close_delay) / 10; + old_closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ? + ASYNC_CLOSING_WAIT_NONE : + jiffies_to_msecs(acm->port.closing_wait) / 10; mutex_lock(&acm->port.mutex); if (!capable(CAP_SYS_ADMIN)) { - if ((close_delay != acm->port.close_delay) || - (closing_wait != acm->port.closing_wait)) + if ((new_serial.close_delay != old_close_delay) || + (new_serial.closing_wait != old_closing_wait)) retval = -EPERM; else retval = -EOPNOTSUPP; @@ -1366,6 +1396,7 @@ static int acm_probe(struct usb_interface *intf, acm->readsize = readsize; acm->rx_buflimit = num_rx_buf; INIT_WORK(&acm->work, acm_softint); + INIT_DELAYED_WORK(&acm->dwork, acm_softint); init_waitqueue_head(&acm->wioctl); spin_lock_init(&acm->write_lock); spin_lock_init(&acm->read_lock); @@ -1579,6 +1610,7 @@ static void acm_disconnect(struct usb_interface *intf) acm_kill_urbs(acm); cancel_work_sync(&acm->work); + cancel_delayed_work_sync(&acm->dwork); tty_unregister_device(acm_tty_driver, acm->minor); @@ -1621,6 +1653,8 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message) acm_kill_urbs(acm); cancel_work_sync(&acm->work); + cancel_delayed_work_sync(&acm->dwork); + acm->urbs_in_error_delay = 0; return 0; } diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 515aad0847ee84b408e7fceb503081f5d97fc78b..30380d28a504b8715417f6f720103ea480ab562a 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -108,8 +108,11 @@ struct acm { unsigned long flags; # define EVENT_TTY_WAKEUP 0 # define EVENT_RX_STALL 1 +# define ACM_ERROR_DELAY 3 + unsigned long urbs_in_error_delay; /* these need to be restarted after a delay */ struct usb_cdc_line_coding line; /* bits, stop, parity */ - struct work_struct work; /* work queue entry for line discipline waking up */ + struct work_struct work; /* work queue entry for various purposes*/ + struct delayed_work dwork; /* for cool downs needed in error recovery */ unsigned int ctrlin; /* input control lines (DCD, DSR, RI, break, overruns) */ unsigned int ctrlout; /* output control lines (DTR, RTS) */ struct async_icount iocount; /* counters for control line changes */ diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 7f2ff4fa7e4ef13b64c2988ec434671ad90b98cd..8e03def58017e12e9f2136b51d6d497f4d8ad386 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -37,7 +37,9 @@ #include "otg_whitelist.h" #define USB_VENDOR_GENESYS_LOGIC 0x05e3 +#define USB_VENDOR_SMSC 0x0424 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01 +#define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02 /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can @@ -968,13 +970,17 @@ int usb_remove_device(struct usb_device *udev) { struct usb_hub *hub; struct usb_interface *intf; + int ret; if (!udev->parent) /* Can't remove a root hub */ return -EINVAL; hub = usb_hub_to_struct_hub(udev->parent); intf = to_usb_interface(hub->intfdev); - usb_autopm_get_interface(intf); + ret = usb_autopm_get_interface(intf); + if (ret < 0) + return ret; + set_bit(udev->portnum, hub->removed_bits); hub_port_logical_disconnect(hub, udev->portnum); usb_autopm_put_interface(intf); @@ -1697,6 +1703,10 @@ static void hub_disconnect(struct usb_interface *intf) kfree(hub->buffer); pm_suspend_ignore_children(&intf->dev, false); + + if (hub->quirk_disable_autosuspend) + usb_autopm_put_interface(intf); + kref_put(&hub->kref, hub_release); } @@ -1827,6 +1837,11 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND) hub->quirk_check_port_auto_suspend = 1; + if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) { + hub->quirk_disable_autosuspend = 1; + usb_autopm_get_interface_no_resume(intf); + } + if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) return 0; @@ -3010,6 +3025,15 @@ static int check_port_resume_type(struct usb_device *udev, if (portchange & USB_PORT_STAT_C_ENABLE) usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_ENABLE); + + /* + * Whatever made this reset-resume necessary may have + * turned on the port1 bit in hub->change_bits. But after + * a successful reset-resume we want the bit to be clear; + * if it was on it would indicate that something happened + * following the reset-resume. + */ + clear_bit(port1, hub->change_bits); } return status; @@ -5325,6 +5349,10 @@ static void hub_event(struct work_struct *work) } static const struct usb_device_id hub_id_table[] = { + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, + .idVendor = USB_VENDOR_SMSC, + .bInterfaceClass = USB_CLASS_HUB, + .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, .idVendor = USB_VENDOR_GENESYS_LOGIC, diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 34c1a7e22aae020a2f48bce219a342a89d835650..657bacfbe3a7e5c27b7691e11609b0dc2bac3e84 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -69,6 +69,7 @@ struct usb_hub { unsigned quiescing:1; unsigned disconnected:1; unsigned in_reset:1; + unsigned quirk_disable_autosuspend:1; unsigned quirk_check_port_auto_suspend:1; diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index e70578e1115614d229250f9bdbb154a5ab7dd565..00e80cfe614cefc4945c7947f4e37627891f29da 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -586,12 +586,13 @@ void usb_sg_cancel(struct usb_sg_request *io) int i, retval; spin_lock_irqsave(&io->lock, flags); - if (io->status) { + if (io->status || io->count == 0) { spin_unlock_irqrestore(&io->lock, flags); return; } /* shut everything down */ io->status = -ECONNRESET; + io->count++; /* Keep the request alive until we're done */ spin_unlock_irqrestore(&io->lock, flags); for (i = io->entries - 1; i >= 0; --i) { @@ -605,6 +606,12 @@ void usb_sg_cancel(struct usb_sg_request *io) dev_warn(&io->dev->dev, "%s, unlink --> %d\n", __func__, retval); } + + spin_lock_irqsave(&io->lock, flags); + io->count--; + if (!io->count) + complete(&io->complete); + spin_unlock_irqrestore(&io->lock, flags); } EXPORT_SYMBOL_GPL(usb_sg_cancel); diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 460c855be0d020bd21d3cc024d0a33d705b0d59d..53c1f6e604b152923bf323317d270b05da6c20dd 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -179,7 +179,10 @@ static int usb_port_runtime_resume(struct device *dev) if (!port_dev->is_superspeed && peer) pm_runtime_get_sync(&peer->dev); - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, true); msleep(hub_power_on_good_delay(hub)); if (udev && !retval) { @@ -232,7 +235,10 @@ static int usb_port_runtime_suspend(struct device *dev) if (usb_port_block_power_off) return -EBUSY; - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, false); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 047bb6ebf48580f9f1f54ddd15c421d2cdcf39dc..7d3130b0209ee2ed0d1161657a2f233f12b2b1de 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -86,6 +86,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Logitech PTZ Pro Camera */ { USB_DEVICE(0x046d, 0x0853), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Screen Share */ + { USB_DEVICE(0x046d, 0x086c), .driver_info = USB_QUIRK_NO_LPM }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -226,6 +229,12 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0b05, 0x17e0), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Realtek hub in Dell WD19 (Type-C) */ + { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM }, + + /* Generic RTL8153 based ethernet adapters */ + { USB_DEVICE(0x0bda, 0x8153), .driver_info = USB_QUIRK_NO_LPM }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, @@ -263,6 +272,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Corsair K70 LUX */ { USB_DEVICE(0x1b1c, 0x1b36), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Corsair K70 RGB RAPDIFIRE */ + { USB_DEVICE(0x1b1c, 0x1b38), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_DELAY_CTRL_MSG }, + /* MIDI keyboard WORLDE MINI */ { USB_DEVICE(0x1c75, 0x0204), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, @@ -297,6 +310,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, + /* novation SoundControl XL */ + { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME }, + { } /* terminating entry must be last */ }; diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 4af9a1c652edb56b3bcf87c67d6568561f496d16..aeb6f7c84ea0ae83a83c5863e37ecf6eb97fbb78 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3933,11 +3933,12 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep, * a unique tx-fifo even if it is non-periodic. */ if (dir_in && hsotg->dedicated_fifos) { + unsigned fifo_count = dwc2_hsotg_tx_fifo_count(hsotg); u32 fifo_index = 0; u32 fifo_size = UINT_MAX; size = hs_ep->ep.maxpacket * hs_ep->mc; - for (i = 1; i < hsotg->num_of_eps; ++i) { + for (i = 1; i <= fifo_count; ++i) { if (hsotg->fifo_map & (1 << i)) continue; val = dwc2_readl(hsotg->regs + DPTXFSIZN(i)); diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index ecc696189967a8e315c4c124e757361fde8680a2..f5a39d85382f62a0f1835dd28aaa150d51dcd457 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -944,6 +944,9 @@ int dwc3_core_init(struct dwc3 *dwc) if (dwc->dis_tx_ipgap_linecheck_quirk) reg |= DWC3_GUCTL1_TX_IPGAP_LINECHECK_DIS; + if (dwc->parkmode_disable_ss_quirk) + reg |= DWC3_GUCTL1_PARKMODE_DISABLE_SS; + /* * STAR: 9001415732: Host failure when Park mode is enabled: * Disable parkmode for Gen1 controllers to fix the stall @@ -1255,6 +1258,8 @@ static void dwc3_get_properties(struct dwc3 *dwc) "snps,dis-del-phy-power-chg-quirk"); dwc->dis_tx_ipgap_linecheck_quirk = device_property_read_bool(dev, "snps,dis-tx-ipgap-linecheck-quirk"); + dwc->parkmode_disable_ss_quirk = device_property_read_bool(dev, + "snps,parkmode-disable-ss-quirk"); dwc->tx_de_emphasis_quirk = device_property_read_bool(dev, "snps,tx_de_emphasis_quirk"); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index ae63ee62491fa34b7cbdd6df50490e5c9c3565df..20e4dd6d9c8ba6b3d05952fc8c899aecae03484f 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -220,6 +220,7 @@ #define DWC3_GCTL_DSBLCLKGTNG BIT(0) /* Global User Control 1 Register */ +#define DWC3_GUCTL1_PARKMODE_DISABLE_SS BIT(17) #define DWC3_GUCTL1_TX_IPGAP_LINECHECK_DIS BIT(28) #define DWC3_GUCTL1_DEV_L1_EXIT_BY_HW BIT(24) #define DWC3_GUCTL1_IP_GAP_ADD_ON(n) (n << 21) @@ -969,6 +970,8 @@ struct dwc3_scratchpad_array { * change quirk. * @dis_tx_ipgap_linecheck_quirk: set if we disable u2mac linestate * check during HS transmit. + * @parkmode_disable_ss_quirk: set if we need to disable all SuperSpeed + * instances in park mode. * @tx_de_emphasis_quirk: set if we enable Tx de-emphasis quirk * @tx_de_emphasis: Tx de-emphasis value * 0 - -6dB de-emphasis @@ -1163,6 +1166,7 @@ struct dwc3 { unsigned dis_u2_freeclk_exists_quirk:1; unsigned dis_del_phy_power_chg_quirk:1; unsigned dis_tx_ipgap_linecheck_quirk:1; + unsigned parkmode_disable_ss_quirk:1; unsigned tx_de_emphasis_quirk:1; unsigned ssp_u3_u0_quirk:1; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c6f0ce788048552b1046fe34c83656f494cb7e27..c8e8f7dd29a273e742eff5d3308a87fdef441f47 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1939,8 +1939,10 @@ static int dwc3_gadget_wakeup_int(struct dwc3 *dwc) link_state = dwc3_get_link_state(dwc); switch (link_state) { + case DWC3_LINK_STATE_RESET: case DWC3_LINK_STATE_RX_DET: /* in HS, means Early Suspend */ case DWC3_LINK_STATE_U3: /* in HS, means SUSPEND */ + case DWC3_LINK_STATE_RESUME: break; case DWC3_LINK_STATE_U1: if (dwc->gadget.speed < USB_SPEED_SUPER) { diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c index 12fe70beae6991026f6ff2c19d0c92b4d5d5bc2a..21244c556b8102eb1bcb19a61a8372f2e56e10b9 100644 --- a/drivers/usb/early/xhci-dbc.c +++ b/drivers/usb/early/xhci-dbc.c @@ -738,19 +738,19 @@ static void xdbc_handle_tx_event(struct xdbc_trb *evt_trb) case COMP_USB_TRANSACTION_ERROR: case COMP_STALL_ERROR: default: - if (ep_id == XDBC_EPID_OUT) + if (ep_id == XDBC_EPID_OUT || ep_id == XDBC_EPID_OUT_INTEL) xdbc.flags |= XDBC_FLAGS_OUT_STALL; - if (ep_id == XDBC_EPID_IN) + if (ep_id == XDBC_EPID_IN || ep_id == XDBC_EPID_IN_INTEL) xdbc.flags |= XDBC_FLAGS_IN_STALL; xdbc_trace("endpoint %d stalled\n", ep_id); break; } - if (ep_id == XDBC_EPID_IN) { + if (ep_id == XDBC_EPID_IN || ep_id == XDBC_EPID_IN_INTEL) { xdbc.flags &= ~XDBC_FLAGS_IN_PROCESS; xdbc_bulk_transfer(NULL, XDBC_MAX_PACKET, true); - } else if (ep_id == XDBC_EPID_OUT) { + } else if (ep_id == XDBC_EPID_OUT || ep_id == XDBC_EPID_OUT_INTEL) { xdbc.flags &= ~XDBC_FLAGS_OUT_PROCESS; } else { xdbc_trace("invalid endpoint id %d\n", ep_id); diff --git a/drivers/usb/early/xhci-dbc.h b/drivers/usb/early/xhci-dbc.h index a516cab0bf4a5401d90f9d4c2c941f650eb0a375..6c9200d913daf060466adcbca7ef7e0a2d132458 100644 --- a/drivers/usb/early/xhci-dbc.h +++ b/drivers/usb/early/xhci-dbc.h @@ -123,8 +123,22 @@ struct xdbc_ring { u32 cycle_state; }; -#define XDBC_EPID_OUT 2 -#define XDBC_EPID_IN 3 +/* + * These are the "Endpoint ID" (also known as "Context Index") values for the + * OUT Transfer Ring and the IN Transfer Ring of a Debug Capability Context data + * structure. + * According to the "eXtensible Host Controller Interface for Universal Serial + * Bus (xHCI)" specification, section "7.6.3.2 Endpoint Contexts and Transfer + * Rings", these should be 0 and 1, and those are the values AMD machines give + * you; but Intel machines seem to use the formula from section "4.5.1 Device + * Context Index", which is supposed to be used for the Device Context only. + * Luckily the values from Intel don't overlap with those from AMD, so we can + * just test for both. + */ +#define XDBC_EPID_OUT 0 +#define XDBC_EPID_IN 1 +#define XDBC_EPID_OUT_INTEL 2 +#define XDBC_EPID_IN_INTEL 3 struct xdbc_state { u16 vendor; diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 904cfe925ad92ba02dc1174aa24da82a4e56d3f1..21fbcec38aefcf21578594f454fc24166bb445ba 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -671,34 +671,6 @@ config USB_CONFIGFS_F_MDM_DATA help USB QTI modem data function driver. -choice - tristate "USB Gadget precomposed configurations" - default USB_ETH - optional - help - A Linux "Gadget Driver" talks to the USB Peripheral Controller - driver through the abstract "gadget" API. Some other operating - systems call these "client" drivers, of which "class drivers" - are a subset (implementing a USB device class specification). - A gadget driver implements one or more USB functions using - the peripheral hardware. - - Gadget drivers are hardware-neutral, or "platform independent", - except that they sometimes must understand quirks or limitations - of the particular controllers they work with. For example, when - a controller doesn't support alternate configurations or provide - enough of the right types of endpoints, the gadget driver might - not be able work with that controller, or might need to implement - a less common variant of a device class protocol. - - The available choices each represent a single precomposed USB - gadget configuration. In the device model, each option contains - both the device instantiation as a child for a USB gadget - controller, and the relevant drivers for each function declared - by the device. - source "drivers/usb/gadget/legacy/Kconfig" -endchoice - endif # USB_GADGET diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index ad9f3f423e614dba4d9a9f478bceab25bbfebca5..f7a57942f2df1179314fcc61af6f1cacff5a0229 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -569,14 +569,14 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, val = CONFIG_USB_GADGET_VBUS_DRAW; if (!val) return 0; - switch (speed) { - case USB_SPEED_SUPER: - case USB_SPEED_SUPER_PLUS: - return (u8)(val / 8); - default: - /* only SuperSpeed and faster support > 500mA */ - return DIV_ROUND_UP(min(val, 500U), 2); - } + if (speed < USB_SPEED_SUPER) + return min(val, 500U) / 2; + else + /* + * USB 3.x supports up to 900mA, but since 900 isn't divisible + * by 8 the integral division will effectively cap to 896mA. + */ + return min(val, 900U) / 8; } static int config_buf(struct usb_configuration *config, @@ -978,8 +978,14 @@ static int set_config(struct usb_composite_dev *cdev, power = c->MaxPower ? c->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; if (gadget->speed < USB_SPEED_SUPER) power = min(power, 500U); - + else + power = min(power, 900U); done: + if (power <= USB_SELF_POWER_VBUS_MAX_DRAW) + usb_gadget_set_selfpowered(gadget); + else + usb_gadget_clear_selfpowered(gadget); + usb_gadget_vbus_draw(gadget, power); if (result >= 0 && cdev->delayed_status) result = USB_GADGET_DELAYED_STATUS; @@ -2417,6 +2423,7 @@ void composite_suspend(struct usb_gadget *gadget) cdev->suspended = 1; spin_unlock_irqrestore(&cdev->lock, flags); + usb_gadget_set_selfpowered(gadget); usb_gadget_vbus_draw(gadget, 2); } @@ -2466,10 +2473,16 @@ void composite_resume(struct usb_gadget *gadget) f->resume(f); } - maxpower = cdev->config->MaxPower; - maxpower = maxpower ? maxpower : CONFIG_USB_GADGET_VBUS_DRAW; + maxpower = cdev->config->MaxPower ? + cdev->config->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; if (gadget->speed < USB_SPEED_SUPER) maxpower = min(maxpower, 500U); + else + maxpower = min(maxpower, 900U); + + if (maxpower > USB_SELF_POWER_VBUS_MAX_DRAW) + usb_gadget_clear_selfpowered(gadget); + usb_gadget_vbus_draw(gadget, maxpower); } diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 31780ddf09c025d567608b0af802968d2321d1c8..c46874d6f3b0ece628717cb6f1ced6a3273fe222 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1138,6 +1138,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) ret = usb_ep_queue(ep->ep, req, GFP_ATOMIC); if (unlikely(ret)) { + io_data->req = NULL; usb_ep_free_request(ep->ep, req); goto error_lock; } @@ -1191,6 +1192,7 @@ static int ffs_aio_cancel(struct kiocb *kiocb) struct ffs_io_data *io_data = kiocb->private; struct ffs_epfile *epfile = kiocb->ki_filp->private_data; struct ffs_data *ffs = epfile->ffs; + unsigned long flags; int value; ENTER(); @@ -1198,14 +1200,14 @@ static int ffs_aio_cancel(struct kiocb *kiocb) ffs_log("enter:state %d setup_state %d flag %lu", ffs->state, ffs->setup_state, ffs->flags); - spin_lock_irq(&epfile->ffs->eps_lock); + spin_lock_irqsave(&epfile->ffs->eps_lock, flags); if (likely(io_data && io_data->ep && io_data->req)) value = usb_ep_dequeue(io_data->ep, io_data->req); else value = -EINVAL; - spin_unlock_irq(&epfile->ffs->eps_lock); + spin_unlock_irqrestore(&epfile->ffs->eps_lock, flags); ffs_log("exit: value %d", value); diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index d8a50c7da202dcf9ed59d5545762bc5735b37350..eb04868e9e080256e51dc9508ab9a612a5333d0e 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -715,8 +715,10 @@ static int gs_start_io(struct gs_port *port) port->n_read = 0; started = gs_start_rx(port); - /* unblock any pending writes into our circular buffer */ if (started) { + gs_start_tx(port); + /* Unblock any pending writes into our circular buffer, in case + * we didn't in gs_start_tx() */ tty_wakeup(port->port.tty); } else { gs_free_requests(ep, head, &port->read_allocated); diff --git a/drivers/usb/gadget/legacy/Kconfig b/drivers/usb/gadget/legacy/Kconfig index 82445dd40a96b7c64755c3e79d3c69decb642553..56590504057c3f03c20d0c51fbc836d4c9edf46d 100644 --- a/drivers/usb/gadget/legacy/Kconfig +++ b/drivers/usb/gadget/legacy/Kconfig @@ -13,6 +13,32 @@ # both kinds of controller can also support "USB On-the-Go" (CONFIG_USB_OTG). # +choice + tristate "USB Gadget precomposed configurations" + default USB_ETH + optional + help + A Linux "Gadget Driver" talks to the USB Peripheral Controller + driver through the abstract "gadget" API. Some other operating + systems call these "client" drivers, of which "class drivers" + are a subset (implementing a USB device class specification). + A gadget driver implements one or more USB functions using + the peripheral hardware. + + Gadget drivers are hardware-neutral, or "platform independent", + except that they sometimes must understand quirks or limitations + of the particular controllers they work with. For example, when + a controller doesn't support alternate configurations or provide + enough of the right types of endpoints, the gadget driver might + not be able work with that controller, or might need to implement + a less common variant of a device class protocol. + + The available choices each represent a single precomposed USB + gadget configuration. In the device model, each option contains + both the device instantiation as a child for a USB gadget + controller, and the relevant drivers for each function declared + by the device. + config USB_ZERO tristate "Gadget Zero (DEVELOPMENT)" select USB_LIBCOMPOSITE @@ -500,3 +526,16 @@ config USB_G_WEBCAM Say "y" to link the driver statically, or "m" to build a dynamically linked module called "g_webcam". + +config USB_RAW_GADGET + tristate "USB Raw Gadget" + help + USB Raw Gadget is a kernel module that provides a userspace interface + for the USB Gadget subsystem. Essentially it allows to emulate USB + devices from userspace. See Documentation/usb/raw-gadget.rst for + details. + + Say "y" to link the driver statically, or "m" to build a + dynamically linked module called "raw_gadget". + +endchoice diff --git a/drivers/usb/gadget/legacy/Makefile b/drivers/usb/gadget/legacy/Makefile index 216d5cf177caee76e2ab1682c2ef5228af236845..2220792c3f5735dce1bd362bf4a5896971a42a2a 100644 --- a/drivers/usb/gadget/legacy/Makefile +++ b/drivers/usb/gadget/legacy/Makefile @@ -45,3 +45,4 @@ obj-$(CONFIG_USB_G_NCM) += g_ncm.o obj-$(CONFIG_USB_G_ACM_MS) += g_acm_ms.o obj-$(CONFIG_USB_GADGET_TARGET) += tcm_usb_gadget.o obj-$(CONFIG_USB_G_QTI) += g_qti_gadget.o +obj-$(CONFIG_USB_RAW_GADGET) += raw_gadget.o diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c new file mode 100644 index 0000000000000000000000000000000000000000..76406343fbe577d2d8e9fafe03c72a27dba8a051 --- /dev/null +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -0,0 +1,1078 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * USB Raw Gadget driver. + * See Documentation/usb/raw-gadget.rst for more details. + * + * Andrey Konovalov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define DRIVER_DESC "USB Raw Gadget" +#define DRIVER_NAME "raw-gadget" + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_AUTHOR("Andrey Konovalov"); +MODULE_LICENSE("GPL"); + +/*----------------------------------------------------------------------*/ + +#define RAW_EVENT_QUEUE_SIZE 16 + +struct raw_event_queue { + /* See the comment in raw_event_queue_fetch() for locking details. */ + spinlock_t lock; + struct semaphore sema; + struct usb_raw_event *events[RAW_EVENT_QUEUE_SIZE]; + int size; +}; + +static void raw_event_queue_init(struct raw_event_queue *queue) +{ + spin_lock_init(&queue->lock); + sema_init(&queue->sema, 0); + queue->size = 0; +} + +static int raw_event_queue_add(struct raw_event_queue *queue, + enum usb_raw_event_type type, size_t length, const void *data) +{ + unsigned long flags; + struct usb_raw_event *event; + + spin_lock_irqsave(&queue->lock, flags); + if (WARN_ON(queue->size >= RAW_EVENT_QUEUE_SIZE)) { + spin_unlock_irqrestore(&queue->lock, flags); + return -ENOMEM; + } + event = kmalloc(sizeof(*event) + length, GFP_ATOMIC); + if (!event) { + spin_unlock_irqrestore(&queue->lock, flags); + return -ENOMEM; + } + event->type = type; + event->length = length; + if (event->length) + memcpy(&event->data[0], data, length); + queue->events[queue->size] = event; + queue->size++; + up(&queue->sema); + spin_unlock_irqrestore(&queue->lock, flags); + return 0; +} + +static struct usb_raw_event *raw_event_queue_fetch( + struct raw_event_queue *queue) +{ + unsigned long flags; + struct usb_raw_event *event; + + /* + * This function can be called concurrently. We first check that + * there's at least one event queued by decrementing the semaphore, + * and then take the lock to protect queue struct fields. + */ + if (down_interruptible(&queue->sema)) + return NULL; + spin_lock_irqsave(&queue->lock, flags); + if (WARN_ON(!queue->size)) + return NULL; + event = queue->events[0]; + queue->size--; + memmove(&queue->events[0], &queue->events[1], + queue->size * sizeof(queue->events[0])); + spin_unlock_irqrestore(&queue->lock, flags); + return event; +} + +static void raw_event_queue_destroy(struct raw_event_queue *queue) +{ + int i; + + for (i = 0; i < queue->size; i++) + kfree(queue->events[i]); + queue->size = 0; +} + +/*----------------------------------------------------------------------*/ + +struct raw_dev; + +#define USB_RAW_MAX_ENDPOINTS 32 + +enum ep_state { + STATE_EP_DISABLED, + STATE_EP_ENABLED, +}; + +struct raw_ep { + struct raw_dev *dev; + enum ep_state state; + struct usb_ep *ep; + struct usb_request *req; + bool urb_queued; + bool disabling; + ssize_t status; +}; + +enum dev_state { + STATE_DEV_INVALID = 0, + STATE_DEV_OPENED, + STATE_DEV_INITIALIZED, + STATE_DEV_RUNNING, + STATE_DEV_CLOSED, + STATE_DEV_FAILED +}; + +struct raw_dev { + struct kref count; + spinlock_t lock; + + const char *udc_name; + struct usb_gadget_driver driver; + + /* Reference to misc device: */ + struct device *dev; + + /* Protected by lock: */ + enum dev_state state; + bool gadget_registered; + struct usb_gadget *gadget; + struct usb_request *req; + bool ep0_in_pending; + bool ep0_out_pending; + bool ep0_urb_queued; + ssize_t ep0_status; + struct raw_ep eps[USB_RAW_MAX_ENDPOINTS]; + + struct completion ep0_done; + struct raw_event_queue queue; +}; + +static struct raw_dev *dev_new(void) +{ + struct raw_dev *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; + /* Matches kref_put() in raw_release(). */ + kref_init(&dev->count); + spin_lock_init(&dev->lock); + init_completion(&dev->ep0_done); + raw_event_queue_init(&dev->queue); + return dev; +} + +static void dev_free(struct kref *kref) +{ + struct raw_dev *dev = container_of(kref, struct raw_dev, count); + int i; + + kfree(dev->udc_name); + kfree(dev->driver.udc_name); + if (dev->req) { + if (dev->ep0_urb_queued) + usb_ep_dequeue(dev->gadget->ep0, dev->req); + usb_ep_free_request(dev->gadget->ep0, dev->req); + } + raw_event_queue_destroy(&dev->queue); + for (i = 0; i < USB_RAW_MAX_ENDPOINTS; i++) { + if (dev->eps[i].state != STATE_EP_ENABLED) + continue; + usb_ep_disable(dev->eps[i].ep); + usb_ep_free_request(dev->eps[i].ep, dev->eps[i].req); + kfree(dev->eps[i].ep->desc); + dev->eps[i].state = STATE_EP_DISABLED; + } + kfree(dev); +} + +/*----------------------------------------------------------------------*/ + +static int raw_queue_event(struct raw_dev *dev, + enum usb_raw_event_type type, size_t length, const void *data) +{ + int ret = 0; + unsigned long flags; + + ret = raw_event_queue_add(&dev->queue, type, length, data); + if (ret < 0) { + spin_lock_irqsave(&dev->lock, flags); + dev->state = STATE_DEV_FAILED; + spin_unlock_irqrestore(&dev->lock, flags); + } + return ret; +} + +static void gadget_ep0_complete(struct usb_ep *ep, struct usb_request *req) +{ + struct raw_dev *dev = req->context; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + if (req->status) + dev->ep0_status = req->status; + else + dev->ep0_status = req->actual; + if (dev->ep0_in_pending) + dev->ep0_in_pending = false; + else + dev->ep0_out_pending = false; + spin_unlock_irqrestore(&dev->lock, flags); + + complete(&dev->ep0_done); +} + +static int gadget_bind(struct usb_gadget *gadget, + struct usb_gadget_driver *driver) +{ + int ret = 0; + struct raw_dev *dev = container_of(driver, struct raw_dev, driver); + struct usb_request *req; + unsigned long flags; + + if (strcmp(gadget->name, dev->udc_name) != 0) + return -ENODEV; + + set_gadget_data(gadget, dev); + req = usb_ep_alloc_request(gadget->ep0, GFP_KERNEL); + if (!req) { + dev_err(&gadget->dev, "usb_ep_alloc_request failed\n"); + set_gadget_data(gadget, NULL); + return -ENOMEM; + } + + spin_lock_irqsave(&dev->lock, flags); + dev->req = req; + dev->req->context = dev; + dev->req->complete = gadget_ep0_complete; + dev->gadget = gadget; + spin_unlock_irqrestore(&dev->lock, flags); + + /* Matches kref_put() in gadget_unbind(). */ + kref_get(&dev->count); + + ret = raw_queue_event(dev, USB_RAW_EVENT_CONNECT, 0, NULL); + if (ret < 0) + dev_err(&gadget->dev, "failed to queue event\n"); + + return ret; +} + +static void gadget_unbind(struct usb_gadget *gadget) +{ + struct raw_dev *dev = get_gadget_data(gadget); + + set_gadget_data(gadget, NULL); + /* Matches kref_get() in gadget_bind(). */ + kref_put(&dev->count, dev_free); +} + +static int gadget_setup(struct usb_gadget *gadget, + const struct usb_ctrlrequest *ctrl) +{ + int ret = 0; + struct raw_dev *dev = get_gadget_data(gadget); + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_err(&gadget->dev, "ignoring, device is not running\n"); + ret = -ENODEV; + goto out_unlock; + } + if (dev->ep0_in_pending || dev->ep0_out_pending) { + dev_dbg(&gadget->dev, "stalling, request already pending\n"); + ret = -EBUSY; + goto out_unlock; + } + if ((ctrl->bRequestType & USB_DIR_IN) && ctrl->wLength) + dev->ep0_in_pending = true; + else + dev->ep0_out_pending = true; + spin_unlock_irqrestore(&dev->lock, flags); + + ret = raw_queue_event(dev, USB_RAW_EVENT_CONTROL, sizeof(*ctrl), ctrl); + if (ret < 0) + dev_err(&gadget->dev, "failed to queue event\n"); + goto out; + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); +out: + return ret; +} + +/* These are currently unused but present in case UDC driver requires them. */ +static void gadget_disconnect(struct usb_gadget *gadget) { } +static void gadget_suspend(struct usb_gadget *gadget) { } +static void gadget_resume(struct usb_gadget *gadget) { } +static void gadget_reset(struct usb_gadget *gadget) { } + +/*----------------------------------------------------------------------*/ + +static struct miscdevice raw_misc_device; + +static int raw_open(struct inode *inode, struct file *fd) +{ + struct raw_dev *dev; + + /* Nonblocking I/O is not supported yet. */ + if (fd->f_flags & O_NONBLOCK) + return -EINVAL; + + dev = dev_new(); + if (!dev) + return -ENOMEM; + fd->private_data = dev; + dev->state = STATE_DEV_OPENED; + dev->dev = raw_misc_device.this_device; + return 0; +} + +static int raw_release(struct inode *inode, struct file *fd) +{ + int ret = 0; + struct raw_dev *dev = fd->private_data; + unsigned long flags; + bool unregister = false; + + spin_lock_irqsave(&dev->lock, flags); + dev->state = STATE_DEV_CLOSED; + if (!dev->gadget) { + spin_unlock_irqrestore(&dev->lock, flags); + goto out_put; + } + if (dev->gadget_registered) + unregister = true; + dev->gadget_registered = false; + spin_unlock_irqrestore(&dev->lock, flags); + + if (unregister) { + ret = usb_gadget_unregister_driver(&dev->driver); + if (ret != 0) + dev_err(dev->dev, + "usb_gadget_unregister_driver() failed with %d\n", + ret); + /* Matches kref_get() in raw_ioctl_run(). */ + kref_put(&dev->count, dev_free); + } + +out_put: + /* Matches dev_new() in raw_open(). */ + kref_put(&dev->count, dev_free); + return ret; +} + +/*----------------------------------------------------------------------*/ + +static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + struct usb_raw_init arg; + char *udc_driver_name; + char *udc_device_name; + unsigned long flags; + + ret = copy_from_user(&arg, (void __user *)value, sizeof(arg)); + if (ret) + return ret; + + switch (arg.speed) { + case USB_SPEED_UNKNOWN: + arg.speed = USB_SPEED_HIGH; + break; + case USB_SPEED_LOW: + case USB_SPEED_FULL: + case USB_SPEED_HIGH: + case USB_SPEED_SUPER: + break; + default: + return -EINVAL; + } + + udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); + if (!udc_driver_name) + return -ENOMEM; + ret = strscpy(udc_driver_name, &arg.driver_name[0], + UDC_NAME_LENGTH_MAX); + if (ret < 0) { + kfree(udc_driver_name); + return ret; + } + ret = 0; + + udc_device_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); + if (!udc_device_name) { + kfree(udc_driver_name); + return -ENOMEM; + } + ret = strscpy(udc_device_name, &arg.device_name[0], + UDC_NAME_LENGTH_MAX); + if (ret < 0) { + kfree(udc_driver_name); + kfree(udc_device_name); + return ret; + } + ret = 0; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_OPENED) { + dev_dbg(dev->dev, "fail, device is not opened\n"); + kfree(udc_driver_name); + kfree(udc_device_name); + ret = -EINVAL; + goto out_unlock; + } + dev->udc_name = udc_driver_name; + + dev->driver.function = DRIVER_DESC; + dev->driver.max_speed = arg.speed; + dev->driver.setup = gadget_setup; + dev->driver.disconnect = gadget_disconnect; + dev->driver.bind = gadget_bind; + dev->driver.unbind = gadget_unbind; + dev->driver.suspend = gadget_suspend; + dev->driver.resume = gadget_resume; + dev->driver.reset = gadget_reset; + dev->driver.driver.name = DRIVER_NAME; + dev->driver.udc_name = udc_device_name; + dev->driver.match_existing_only = 1; + + dev->state = STATE_DEV_INITIALIZED; + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_run(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + unsigned long flags; + + if (value) + return -EINVAL; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_INITIALIZED) { + dev_dbg(dev->dev, "fail, device is not initialized\n"); + ret = -EINVAL; + goto out_unlock; + } + spin_unlock_irqrestore(&dev->lock, flags); + + ret = usb_gadget_probe_driver(&dev->driver); + + spin_lock_irqsave(&dev->lock, flags); + if (ret) { + dev_err(dev->dev, + "fail, usb_gadget_probe_driver returned %d\n", ret); + dev->state = STATE_DEV_FAILED; + goto out_unlock; + } + dev->gadget_registered = true; + dev->state = STATE_DEV_RUNNING; + /* Matches kref_put() in raw_release(). */ + kref_get(&dev->count); + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_event_fetch(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + struct usb_raw_event arg; + unsigned long flags; + struct usb_raw_event *event; + uint32_t length; + + ret = copy_from_user(&arg, (void __user *)value, sizeof(arg)); + if (ret) + return ret; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + spin_unlock_irqrestore(&dev->lock, flags); + return -EINVAL; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + spin_unlock_irqrestore(&dev->lock, flags); + return -EBUSY; + } + spin_unlock_irqrestore(&dev->lock, flags); + + event = raw_event_queue_fetch(&dev->queue); + if (!event) { + dev_dbg(&dev->gadget->dev, "event fetching interrupted\n"); + return -EINTR; + } + length = min(arg.length, event->length); + ret = copy_to_user((void __user *)value, event, + sizeof(*event) + length); + return ret; +} + +static void *raw_alloc_io_data(struct usb_raw_ep_io *io, void __user *ptr, + bool get_from_user) +{ + int ret; + void *data; + + ret = copy_from_user(io, ptr, sizeof(*io)); + if (ret) + return ERR_PTR(ret); + if (io->ep >= USB_RAW_MAX_ENDPOINTS) + return ERR_PTR(-EINVAL); + if (!usb_raw_io_flags_valid(io->flags)) + return ERR_PTR(-EINVAL); + if (io->length > PAGE_SIZE) + return ERR_PTR(-EINVAL); + if (get_from_user) + data = memdup_user(ptr + sizeof(*io), io->length); + else { + data = kmalloc(io->length, GFP_KERNEL); + if (!data) + data = ERR_PTR(-ENOMEM); + } + return data; +} + +static int raw_process_ep0_io(struct raw_dev *dev, struct usb_raw_ep_io *io, + void *data, bool in) +{ + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_unlock; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_unlock; + } + if (dev->ep0_urb_queued) { + dev_dbg(&dev->gadget->dev, "fail, urb already queued\n"); + ret = -EBUSY; + goto out_unlock; + } + if ((in && !dev->ep0_in_pending) || + (!in && !dev->ep0_out_pending)) { + dev_dbg(&dev->gadget->dev, "fail, wrong direction\n"); + ret = -EBUSY; + goto out_unlock; + } + if (WARN_ON(in && dev->ep0_out_pending)) { + ret = -ENODEV; + dev->state = STATE_DEV_FAILED; + goto out_done; + } + if (WARN_ON(!in && dev->ep0_in_pending)) { + ret = -ENODEV; + dev->state = STATE_DEV_FAILED; + goto out_done; + } + + dev->req->buf = data; + dev->req->length = io->length; + dev->req->zero = usb_raw_io_flags_zero(io->flags); + dev->ep0_urb_queued = true; + spin_unlock_irqrestore(&dev->lock, flags); + + ret = usb_ep_queue(dev->gadget->ep0, dev->req, GFP_KERNEL); + if (ret) { + dev_err(&dev->gadget->dev, + "fail, usb_ep_queue returned %d\n", ret); + spin_lock_irqsave(&dev->lock, flags); + dev->state = STATE_DEV_FAILED; + goto out_done; + } + + ret = wait_for_completion_interruptible(&dev->ep0_done); + if (ret) { + dev_dbg(&dev->gadget->dev, "wait interrupted\n"); + usb_ep_dequeue(dev->gadget->ep0, dev->req); + wait_for_completion(&dev->ep0_done); + spin_lock_irqsave(&dev->lock, flags); + goto out_done; + } + + spin_lock_irqsave(&dev->lock, flags); + ret = dev->ep0_status; + +out_done: + dev->ep0_urb_queued = false; +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_ep0_write(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + void *data; + struct usb_raw_ep_io io; + + data = raw_alloc_io_data(&io, (void __user *)value, true); + if (IS_ERR(data)) + return PTR_ERR(data); + ret = raw_process_ep0_io(dev, &io, data, true); + kfree(data); + return ret; +} + +static int raw_ioctl_ep0_read(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + void *data; + struct usb_raw_ep_io io; + unsigned int length; + + data = raw_alloc_io_data(&io, (void __user *)value, false); + if (IS_ERR(data)) + return PTR_ERR(data); + ret = raw_process_ep0_io(dev, &io, data, false); + if (ret < 0) { + kfree(data); + return ret; + } + length = min(io.length, (unsigned int)ret); + ret = copy_to_user((void __user *)(value + sizeof(io)), data, length); + kfree(data); + return ret; +} + +static bool check_ep_caps(struct usb_ep *ep, + struct usb_endpoint_descriptor *desc) +{ + switch (usb_endpoint_type(desc)) { + case USB_ENDPOINT_XFER_ISOC: + if (!ep->caps.type_iso) + return false; + break; + case USB_ENDPOINT_XFER_BULK: + if (!ep->caps.type_bulk) + return false; + break; + case USB_ENDPOINT_XFER_INT: + if (!ep->caps.type_int) + return false; + break; + default: + return false; + } + + if (usb_endpoint_dir_in(desc) && !ep->caps.dir_in) + return false; + if (usb_endpoint_dir_out(desc) && !ep->caps.dir_out) + return false; + + return true; +} + +static int raw_ioctl_ep_enable(struct raw_dev *dev, unsigned long value) +{ + int ret = 0, i; + unsigned long flags; + struct usb_endpoint_descriptor *desc; + struct usb_ep *ep = NULL; + + desc = memdup_user((void __user *)value, sizeof(*desc)); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + /* + * Endpoints with a maxpacket length of 0 can cause crashes in UDC + * drivers. + */ + if (usb_endpoint_maxp(desc) == 0) { + dev_dbg(dev->dev, "fail, bad endpoint maxpacket\n"); + kfree(desc); + return -EINVAL; + } + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_free; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_free; + } + + for (i = 0; i < USB_RAW_MAX_ENDPOINTS; i++) { + if (dev->eps[i].state == STATE_EP_ENABLED) + continue; + break; + } + if (i == USB_RAW_MAX_ENDPOINTS) { + dev_dbg(&dev->gadget->dev, + "fail, no device endpoints available\n"); + ret = -EBUSY; + goto out_free; + } + + gadget_for_each_ep(ep, dev->gadget) { + if (ep->enabled) + continue; + if (!check_ep_caps(ep, desc)) + continue; + ep->desc = desc; + ret = usb_ep_enable(ep); + if (ret < 0) { + dev_err(&dev->gadget->dev, + "fail, usb_ep_enable returned %d\n", ret); + goto out_free; + } + dev->eps[i].req = usb_ep_alloc_request(ep, GFP_ATOMIC); + if (!dev->eps[i].req) { + dev_err(&dev->gadget->dev, + "fail, usb_ep_alloc_request failed\n"); + usb_ep_disable(ep); + ret = -ENOMEM; + goto out_free; + } + dev->eps[i].ep = ep; + dev->eps[i].state = STATE_EP_ENABLED; + ep->driver_data = &dev->eps[i]; + ret = i; + goto out_unlock; + } + + dev_dbg(&dev->gadget->dev, "fail, no gadget endpoints available\n"); + ret = -EBUSY; + +out_free: + kfree(desc); +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_ep_disable(struct raw_dev *dev, unsigned long value) +{ + int ret = 0, i = value; + unsigned long flags; + const void *desc; + + if (i < 0 || i >= USB_RAW_MAX_ENDPOINTS) + return -EINVAL; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_unlock; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_unlock; + } + if (dev->eps[i].state != STATE_EP_ENABLED) { + dev_dbg(&dev->gadget->dev, "fail, endpoint is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + if (dev->eps[i].disabling) { + dev_dbg(&dev->gadget->dev, + "fail, disable already in progress\n"); + ret = -EINVAL; + goto out_unlock; + } + if (dev->eps[i].urb_queued) { + dev_dbg(&dev->gadget->dev, + "fail, waiting for urb completion\n"); + ret = -EINVAL; + goto out_unlock; + } + dev->eps[i].disabling = true; + spin_unlock_irqrestore(&dev->lock, flags); + + usb_ep_disable(dev->eps[i].ep); + + spin_lock_irqsave(&dev->lock, flags); + usb_ep_free_request(dev->eps[i].ep, dev->eps[i].req); + desc = dev->eps[i].ep->desc; + dev->eps[i].ep = NULL; + dev->eps[i].state = STATE_EP_DISABLED; + kfree(desc); + dev->eps[i].disabling = false; + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static void gadget_ep_complete(struct usb_ep *ep, struct usb_request *req) +{ + struct raw_ep *r_ep = (struct raw_ep *)ep->driver_data; + struct raw_dev *dev = r_ep->dev; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + if (req->status) + r_ep->status = req->status; + else + r_ep->status = req->actual; + spin_unlock_irqrestore(&dev->lock, flags); + + complete((struct completion *)req->context); +} + +static int raw_process_ep_io(struct raw_dev *dev, struct usb_raw_ep_io *io, + void *data, bool in) +{ + int ret = 0; + unsigned long flags; + struct raw_ep *ep = &dev->eps[io->ep]; + DECLARE_COMPLETION_ONSTACK(done); + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_unlock; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_unlock; + } + if (ep->state != STATE_EP_ENABLED) { + dev_dbg(&dev->gadget->dev, "fail, endpoint is not enabled\n"); + ret = -EBUSY; + goto out_unlock; + } + if (ep->disabling) { + dev_dbg(&dev->gadget->dev, + "fail, endpoint is already being disabled\n"); + ret = -EBUSY; + goto out_unlock; + } + if (ep->urb_queued) { + dev_dbg(&dev->gadget->dev, "fail, urb already queued\n"); + ret = -EBUSY; + goto out_unlock; + } + if ((in && !ep->ep->caps.dir_in) || (!in && ep->ep->caps.dir_in)) { + dev_dbg(&dev->gadget->dev, "fail, wrong direction\n"); + ret = -EINVAL; + goto out_unlock; + } + + ep->dev = dev; + ep->req->context = &done; + ep->req->complete = gadget_ep_complete; + ep->req->buf = data; + ep->req->length = io->length; + ep->req->zero = usb_raw_io_flags_zero(io->flags); + ep->urb_queued = true; + spin_unlock_irqrestore(&dev->lock, flags); + + ret = usb_ep_queue(ep->ep, ep->req, GFP_KERNEL); + if (ret) { + dev_err(&dev->gadget->dev, + "fail, usb_ep_queue returned %d\n", ret); + spin_lock_irqsave(&dev->lock, flags); + dev->state = STATE_DEV_FAILED; + goto out_done; + } + + ret = wait_for_completion_interruptible(&done); + if (ret) { + dev_dbg(&dev->gadget->dev, "wait interrupted\n"); + usb_ep_dequeue(ep->ep, ep->req); + wait_for_completion(&done); + spin_lock_irqsave(&dev->lock, flags); + goto out_done; + } + + spin_lock_irqsave(&dev->lock, flags); + ret = ep->status; + +out_done: + ep->urb_queued = false; +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_ep_write(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + char *data; + struct usb_raw_ep_io io; + + data = raw_alloc_io_data(&io, (void __user *)value, true); + if (IS_ERR(data)) + return PTR_ERR(data); + ret = raw_process_ep_io(dev, &io, data, true); + kfree(data); + return ret; +} + +static int raw_ioctl_ep_read(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + char *data; + struct usb_raw_ep_io io; + unsigned int length; + + data = raw_alloc_io_data(&io, (void __user *)value, false); + if (IS_ERR(data)) + return PTR_ERR(data); + ret = raw_process_ep_io(dev, &io, data, false); + if (ret < 0) { + kfree(data); + return ret; + } + length = min(io.length, (unsigned int)ret); + ret = copy_to_user((void __user *)(value + sizeof(io)), data, length); + kfree(data); + return ret; +} + +static int raw_ioctl_configure(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + unsigned long flags; + + if (value) + return -EINVAL; + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_unlock; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_unlock; + } + usb_gadget_set_state(dev->gadget, USB_STATE_CONFIGURED); + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static int raw_ioctl_vbus_draw(struct raw_dev *dev, unsigned long value) +{ + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state != STATE_DEV_RUNNING) { + dev_dbg(dev->dev, "fail, device is not running\n"); + ret = -EINVAL; + goto out_unlock; + } + if (!dev->gadget) { + dev_dbg(dev->dev, "fail, gadget is not bound\n"); + ret = -EBUSY; + goto out_unlock; + } + usb_gadget_vbus_draw(dev->gadget, 2 * value); + +out_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + return ret; +} + +static long raw_ioctl(struct file *fd, unsigned int cmd, unsigned long value) +{ + struct raw_dev *dev = fd->private_data; + int ret = 0; + + if (!dev) + return -EBUSY; + + switch (cmd) { + case USB_RAW_IOCTL_INIT: + ret = raw_ioctl_init(dev, value); + break; + case USB_RAW_IOCTL_RUN: + ret = raw_ioctl_run(dev, value); + break; + case USB_RAW_IOCTL_EVENT_FETCH: + ret = raw_ioctl_event_fetch(dev, value); + break; + case USB_RAW_IOCTL_EP0_WRITE: + ret = raw_ioctl_ep0_write(dev, value); + break; + case USB_RAW_IOCTL_EP0_READ: + ret = raw_ioctl_ep0_read(dev, value); + break; + case USB_RAW_IOCTL_EP_ENABLE: + ret = raw_ioctl_ep_enable(dev, value); + break; + case USB_RAW_IOCTL_EP_DISABLE: + ret = raw_ioctl_ep_disable(dev, value); + break; + case USB_RAW_IOCTL_EP_WRITE: + ret = raw_ioctl_ep_write(dev, value); + break; + case USB_RAW_IOCTL_EP_READ: + ret = raw_ioctl_ep_read(dev, value); + break; + case USB_RAW_IOCTL_CONFIGURE: + ret = raw_ioctl_configure(dev, value); + break; + case USB_RAW_IOCTL_VBUS_DRAW: + ret = raw_ioctl_vbus_draw(dev, value); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/*----------------------------------------------------------------------*/ + +static const struct file_operations raw_fops = { + .open = raw_open, + .unlocked_ioctl = raw_ioctl, + .compat_ioctl = raw_ioctl, + .release = raw_release, + .llseek = no_llseek, +}; + +static struct miscdevice raw_misc_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = DRIVER_NAME, + .fops = &raw_fops, +}; + +module_misc_device(raw_misc_device); diff --git a/drivers/usb/gadget/udc/bdc/bdc_ep.c b/drivers/usb/gadget/udc/bdc/bdc_ep.c index bfd8f7ade9351b5f933525b90dd06d7ee547248d..be9f40bc9c12b93be4ca5797031d87b40a5fdcd2 100644 --- a/drivers/usb/gadget/udc/bdc/bdc_ep.c +++ b/drivers/usb/gadget/udc/bdc/bdc_ep.c @@ -546,7 +546,7 @@ static void bdc_req_complete(struct bdc_ep *ep, struct bdc_req *req, { struct bdc *bdc = ep->bdc; - if (req == NULL || &req->queue == NULL || &req->usb_req == NULL) + if (req == NULL) return; dev_dbg(bdc->dev, "%s ep:%s status:%d\n", __func__, ep->name, status); diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c index 1f9941145746ee4d8b8130d4721e2fb2720fae54..feb73a1c42ef9ca554c3a5136045b37411a7eda5 100644 --- a/drivers/usb/gadget/udc/gr_udc.c +++ b/drivers/usb/gadget/udc/gr_udc.c @@ -2200,8 +2200,6 @@ static int gr_probe(struct platform_device *pdev) return -ENOMEM; } - spin_lock(&dev->lock); - /* Inside lock so that no gadget can use this udc until probe is done */ retval = usb_add_gadget_udc(dev->dev, &dev->gadget); if (retval) { @@ -2210,15 +2208,21 @@ static int gr_probe(struct platform_device *pdev) } dev->added = 1; + spin_lock(&dev->lock); + retval = gr_udc_init(dev); - if (retval) + if (retval) { + spin_unlock(&dev->lock); goto out; - - gr_dfs_create(dev); + } /* Clear all interrupt enables that might be left on since last boot */ gr_disable_interrupts_and_pullup(dev); + spin_unlock(&dev->lock); + + gr_dfs_create(dev); + retval = gr_request_irq(dev, dev->irq); if (retval) { dev_err(dev->dev, "Failed to request irq %d\n", dev->irq); @@ -2247,8 +2251,6 @@ static int gr_probe(struct platform_device *pdev) dev_info(dev->dev, "regs: %p, irq %d\n", dev->regs, dev->irq); out: - spin_unlock(&dev->lock); - if (retval) gr_remove(pdev); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index f567fec65b505d688f0497ec3f733121eaf8a851..e6ba86defeaf6015c25dd3d7efbc339f18458338 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1479,14 +1479,15 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Allow 3 retries for everything but isoc, set CErr = 3 */ if (!usb_endpoint_xfer_isoc(&ep->desc)) err_count = 3; - /* Some devices get this wrong */ - if (usb_endpoint_xfer_bulk(&ep->desc) && udev->speed == USB_SPEED_HIGH) - max_packet = 512; - - if (usb_endpoint_xfer_bulk(&ep->desc) && udev->speed == USB_SPEED_FULL - && max_packet < 8) - max_packet = 8; - + /* HS bulk max packet should be 512, FS bulk supports 8, 16, 32 or 64 */ + if (usb_endpoint_xfer_bulk(&ep->desc)) { + if (udev->speed == USB_SPEED_HIGH) + max_packet = 512; + if (udev->speed == USB_SPEED_FULL) { + max_packet = rounddown_pow_of_two(max_packet); + max_packet = clamp_val(max_packet, 8, 64); + } + } /* xHCI 1.0 and 1.1 indicates that ctrl ep avg TRB Length should be 8 */ if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100) avg_trb_len = 8; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 09f228279c01224285f44ea1aceb29d5a32c471c..c01a0d1e8b5ccb40b7b06a65fc96b7a93c2de90f 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -53,6 +53,7 @@ #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8 #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 +#define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba @@ -139,7 +140,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_AMD_PLL_FIX; if (pdev->vendor == PCI_VENDOR_ID_AMD && - (pdev->device == 0x15e0 || + (pdev->device == 0x145c || + pdev->device == 0x15e0 || pdev->device == 0x15e1 || pdev->device == 0x43bb)) xhci->quirks |= XHCI_SUSPEND_DELAY; @@ -191,7 +193,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_CML_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && @@ -284,6 +287,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd) if (!usb_hcd_is_primary_hcd(hcd)) return 0; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) + xhci_pme_acpi_rtd3_enable(pdev); + xhci_dbg(xhci, "Got SBRN %u\n", (unsigned int) xhci->sbrn); /* Find any debug ports */ @@ -344,9 +350,6 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) HCC_MAX_PSA(xhci->hcc_params) >= 4) xhci->shared_hcd->can_do_streams = 1; - if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_acpi_rtd3_enable(dev); - /* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */ pm_runtime_put_noidle(&dev->dev); diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 85b3c924f25a8c0bab78bca0ceb7b1df549573e1..6102ac50d753e96d0aa65280421687bcec31d227 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -592,6 +592,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match); static struct platform_driver usb_xhci_driver = { .probe = xhci_plat_probe, .remove = xhci_plat_remove, + .shutdown = usb_hcd_platform_shutdown, .driver = { .name = "xhci-hcd", .pm = &xhci_plat_pm_ops, diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 82530947d1058b2d7eaf3d43465e9674f8e1cddb..0ece43c0cf112e0adf20280cc0249852956505b1 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1640,6 +1640,12 @@ static void handle_port_status(struct xhci_hcd *xhci, if ((major_revision == 0x03) != (hcd->speed >= HCD_USB3)) hcd = xhci->shared_hcd; + if (!hcd) { + xhci_dbg(xhci, "No hcd found for port %u event\n", port_id); + bogus_port_status = true; + goto cleanup; + } + if (major_revision == 0) { xhci_warn(xhci, "Event for port %u not in " "Extended Capabilities, ignoring.\n", diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index 1eedf99969594b0d87c7344086d691f010af47a0..a98b5ab628f46c29a3406e3bc18d453a2faebb16 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -276,23 +276,12 @@ DECLARE_EVENT_CLASS(xhci_log_urb, ), TP_printk("ep%d%s-%s: urb %p pipe %u slot %d length %d/%d sgs %d/%d stream %d flags %08x", __entry->epnum, __entry->dir_in ? "in" : "out", - ({ char *s; - switch (__entry->type) { - case USB_ENDPOINT_XFER_INT: - s = "intr"; - break; - case USB_ENDPOINT_XFER_CONTROL: - s = "control"; - break; - case USB_ENDPOINT_XFER_BULK: - s = "bulk"; - break; - case USB_ENDPOINT_XFER_ISOC: - s = "isoc"; - break; - default: - s = "UNKNOWN"; - } s; }), __entry->urb, __entry->pipe, __entry->slot_id, + __print_symbolic(__entry->type, + { USB_ENDPOINT_XFER_INT, "intr" }, + { USB_ENDPOINT_XFER_CONTROL, "control" }, + { USB_ENDPOINT_XFER_BULK, "bulk" }, + { USB_ENDPOINT_XFER_ISOC, "isoc" }), + __entry->urb, __entry->pipe, __entry->slot_id, __entry->actual, __entry->length, __entry->num_mapped_sgs, __entry->num_sgs, __entry->stream, __entry->flags ) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 7f226cc3ef8a5953dbafbc1a08c60435aad636c7..1ec32e5aa004cb56fbc2bcacbc0e1aaa9dea55cb 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -32,6 +32,14 @@ #define USB_DEVICE_ID_CODEMERCS_IOWPV2 0x1512 /* full speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503 +/* fuller speed iowarrior */ +#define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504 +#define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505 +#define USB_DEVICE_ID_CODEMERCS_IOW100 0x1506 + +/* OEMed devices */ +#define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a +#define USB_DEVICE_ID_CODEMERCS_IOW56AM 0x158b /* Get a minor range for your devices from the usb maintainer */ #ifdef CONFIG_USB_DYNAMIC_MINORS @@ -137,6 +145,11 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV1)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV2)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW100)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -364,6 +377,7 @@ static ssize_t iowarrior_write(struct file *file, } switch (dev->product_id) { case USB_DEVICE_ID_CODEMERCS_IOW24: + case USB_DEVICE_ID_CODEMERCS_IOW24SAG: case USB_DEVICE_ID_CODEMERCS_IOWPV1: case USB_DEVICE_ID_CODEMERCS_IOWPV2: case USB_DEVICE_ID_CODEMERCS_IOW40: @@ -378,6 +392,10 @@ static ssize_t iowarrior_write(struct file *file, goto exit; break; case USB_DEVICE_ID_CODEMERCS_IOW56: + case USB_DEVICE_ID_CODEMERCS_IOW56AM: + case USB_DEVICE_ID_CODEMERCS_IOW28: + case USB_DEVICE_ID_CODEMERCS_IOW28L: + case USB_DEVICE_ID_CODEMERCS_IOW100: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -502,6 +520,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case IOW_WRITE: if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24 || + dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24SAG || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV1 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV2 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW40) { @@ -786,7 +805,11 @@ static int iowarrior_probe(struct usb_interface *interface, goto error; } - if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) { + if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -799,7 +822,11 @@ static int iowarrior_probe(struct usb_interface *interface, /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56)) + ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index 3121fa31aabf93a64448d1852466e717e54e7f9e..a6f88442a53a92f6be78f35d766215aa6c87808f 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -1198,18 +1198,18 @@ static int sisusb_read_mem_bulk(struct sisusb_usb_data *sisusb, u32 addr, /* High level: Gfx (indexed) register access */ #ifdef INCL_SISUSB_CON -int sisusb_setreg(struct sisusb_usb_data *sisusb, int port, u8 data) +int sisusb_setreg(struct sisusb_usb_data *sisusb, u32 port, u8 data) { return sisusb_write_memio_byte(sisusb, SISUSB_TYPE_IO, port, data); } -int sisusb_getreg(struct sisusb_usb_data *sisusb, int port, u8 *data) +int sisusb_getreg(struct sisusb_usb_data *sisusb, u32 port, u8 *data) { return sisusb_read_memio_byte(sisusb, SISUSB_TYPE_IO, port, data); } #endif -int sisusb_setidxreg(struct sisusb_usb_data *sisusb, int port, +int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 data) { int ret; @@ -1219,7 +1219,7 @@ int sisusb_setidxreg(struct sisusb_usb_data *sisusb, int port, return ret; } -int sisusb_getidxreg(struct sisusb_usb_data *sisusb, int port, +int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 *data) { int ret; @@ -1229,7 +1229,7 @@ int sisusb_getidxreg(struct sisusb_usb_data *sisusb, int port, return ret; } -int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, int port, u8 idx, +int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, u32 port, u8 idx, u8 myand, u8 myor) { int ret; @@ -1244,7 +1244,7 @@ int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, int port, u8 idx, } static int sisusb_setidxregmask(struct sisusb_usb_data *sisusb, - int port, u8 idx, u8 data, u8 mask) + u32 port, u8 idx, u8 data, u8 mask) { int ret; u8 tmp; @@ -1257,13 +1257,13 @@ static int sisusb_setidxregmask(struct sisusb_usb_data *sisusb, return ret; } -int sisusb_setidxregor(struct sisusb_usb_data *sisusb, int port, +int sisusb_setidxregor(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 myor) { return sisusb_setidxregandor(sisusb, port, index, 0xff, myor); } -int sisusb_setidxregand(struct sisusb_usb_data *sisusb, int port, +int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port, u8 idx, u8 myand) { return sisusb_setidxregandor(sisusb, port, idx, myand, 0x00); @@ -2786,8 +2786,8 @@ static loff_t sisusb_lseek(struct file *file, loff_t offset, int orig) static int sisusb_handle_command(struct sisusb_usb_data *sisusb, struct sisusb_command *y, unsigned long arg) { - int retval, port, length; - u32 address; + int retval, length; + u32 port, address; /* All our commands require the device * to be initialized. diff --git a/drivers/usb/misc/sisusbvga/sisusb_init.h b/drivers/usb/misc/sisusbvga/sisusb_init.h index e79a616f0d26ca6d33db4d9504dff49d45aaad33..f7182257f7e1e81d92da4acbdd9ae12509459117 100644 --- a/drivers/usb/misc/sisusbvga/sisusb_init.h +++ b/drivers/usb/misc/sisusbvga/sisusb_init.h @@ -811,17 +811,17 @@ static const struct SiS_VCLKData SiSUSB_VCLKData[] = { int SiSUSBSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo); int SiSUSBSetVESAMode(struct SiS_Private *SiS_Pr, unsigned short VModeNo); -extern int sisusb_setreg(struct sisusb_usb_data *sisusb, int port, u8 data); -extern int sisusb_getreg(struct sisusb_usb_data *sisusb, int port, u8 * data); -extern int sisusb_setidxreg(struct sisusb_usb_data *sisusb, int port, +extern int sisusb_setreg(struct sisusb_usb_data *sisusb, u32 port, u8 data); +extern int sisusb_getreg(struct sisusb_usb_data *sisusb, u32 port, u8 * data); +extern int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 data); -extern int sisusb_getidxreg(struct sisusb_usb_data *sisusb, int port, +extern int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 * data); -extern int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, int port, +extern int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, u32 port, u8 idx, u8 myand, u8 myor); -extern int sisusb_setidxregor(struct sisusb_usb_data *sisusb, int port, +extern int sisusb_setidxregor(struct sisusb_usb_data *sisusb, u32 port, u8 index, u8 myor); -extern int sisusb_setidxregand(struct sisusb_usb_data *sisusb, int port, +extern int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port, u8 idx, u8 myand); void sisusb_delete(struct kref *kref); diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 802388bb42ba707ee4432456aaaf1718af151d9f..3ec0752e67ac6fd8a58f67c97842cff46fc72d59 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -1494,10 +1494,7 @@ void musb_host_tx(struct musb *musb, u8 epnum) * We need to map sg if the transfer_buffer is * NULL. */ - if (!urb->transfer_buffer) - qh->use_sg = true; - - if (qh->use_sg) { + if (!urb->transfer_buffer) { /* sg_miter_start is already done in musb_ep_program */ if (!sg_miter_next(&qh->sg_miter)) { dev_err(musb->controller, "error: sg list empty\n"); @@ -1505,9 +1502,8 @@ void musb_host_tx(struct musb *musb, u8 epnum) status = -EINVAL; goto done; } - urb->transfer_buffer = qh->sg_miter.addr; length = min_t(u32, length, qh->sg_miter.length); - musb_write_fifo(hw_ep, length, urb->transfer_buffer); + musb_write_fifo(hw_ep, length, qh->sg_miter.addr); qh->sg_miter.consumed = length; sg_miter_stop(&qh->sg_miter); } else { @@ -1516,11 +1512,6 @@ void musb_host_tx(struct musb *musb, u8 epnum) qh->segsize = length; - if (qh->use_sg) { - if (offset + length >= urb->transfer_buffer_length) - qh->use_sg = false; - } - musb_ep_select(mbase, epnum); musb_writew(epio, MUSB_TXCSR, MUSB_TXCSR_H_WZC_BITS | MUSB_TXCSR_TXPKTRDY); @@ -2038,8 +2029,10 @@ void musb_host_rx(struct musb *musb, u8 epnum) urb->actual_length += xfer_len; qh->offset += xfer_len; if (done) { - if (qh->use_sg) + if (qh->use_sg) { qh->use_sg = false; + urb->transfer_buffer = NULL; + } if (urb->status == -EINPROGRESS) urb->status = status; diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 456f3e6ecf034b31cee97f734753055524c38fa2..26e69c2766f560e17930b7b4d11ddb77d3ef4af6 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -388,8 +388,6 @@ static const struct musb_platform_ops omap2430_ops = { .init = omap2430_musb_init, .exit = omap2430_musb_exit, - .set_vbus = omap2430_musb_set_vbus, - .enable = omap2430_musb_enable, .disable = omap2430_musb_disable, diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 3705b64ab948323740aaba443e8079ec5b1d8c97..45d5e5c899e1ff55128a3a9ffeaef029264ea236 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -714,7 +714,7 @@ static void edge_interrupt_callback(struct urb *urb) /* grab the txcredits for the ports if available */ position = 2; portNumber = 0; - while ((position < length) && + while ((position < length - 1) && (portNumber < edge_serial->serial->num_ports)) { txCredits = data[position] | (data[position+1] << 8); if (txCredits) { diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index eff353de47cd5b60a3b48d547eda970d48e84ff9..3621bde2a0ed7b064ac8069d953b8be0048cfb6b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1186,6 +1186,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110a, 0xff), /* Telit ME910G1 */ .driver_info = NCTRL(0) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110b, 0xff), /* Telit ME910G1 (ECM) */ + .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), @@ -1992,8 +1994,14 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ + { USB_DEVICE_INTERFACE_CLASS(0x1435, 0xd191, 0xff), /* Wistron Neweb D19Q1 */ + .driver_info = RSVD(1) | RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x1690, 0x7588, 0xff), /* ASKEY WWHC050 */ + .driver_info = RSVD(1) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff), /* Olicard 600 */ .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2033, 0xff), /* BroadMobi BM806U */ + .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2060, 0xff), /* BroadMobi BM818 */ .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 1c4d3dbd4635d4f1e1ce549946f62bd0fcdaa3a0..5051b1dad09eeb166ecb061dd12867995841d435 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -96,6 +96,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220TA_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LD381_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index fcc2cfc1da28d2d99b00645d20baa2085dd56534..f0a9eeb6272de7661baaa6d4b29dffb9308a24a9 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -129,6 +129,7 @@ #define HP_LM920_PRODUCT_ID 0x026b #define HP_TD620_PRODUCT_ID 0x0956 #define HP_LD960_PRODUCT_ID 0x0b39 +#define HP_LD381_PRODUCT_ID 0x0f7f #define HP_LCM220_PRODUCT_ID 0x3139 #define HP_LCM960_PRODUCT_ID 0x3239 #define HP_LD220_PRODUCT_ID 0x3524 diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 8391a88cf90f3446c5de267067fa0978dd72d883..20dd8df864c45ab75c056de8bcdd10beaa2f3454 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -46,6 +46,7 @@ struct uas_dev_info { struct scsi_cmnd *cmnd[MAX_CMNDS]; spinlock_t lock; struct work_struct work; + struct work_struct scan_work; /* for async scanning */ }; enum { @@ -81,6 +82,19 @@ static void uas_free_streams(struct uas_dev_info *devinfo); static void uas_log_cmd_state(struct scsi_cmnd *cmnd, const char *prefix, int status); +/* + * This driver needs its own workqueue, as we need to control memory allocation. + * + * In the course of error handling and power management uas_wait_for_pending_cmnds() + * needs to flush pending work items. In these contexts we cannot allocate memory + * by doing block IO as we would deadlock. For the same reason we cannot wait + * for anything allocating memory not heeding these constraints. + * + * So we have to control all work items that can be on the workqueue we flush. + * Hence we cannot share a queue and need our own. + */ +static struct workqueue_struct *workqueue; + static void uas_do_work(struct work_struct *work) { struct uas_dev_info *devinfo = @@ -109,12 +123,23 @@ static void uas_do_work(struct work_struct *work) if (!err) cmdinfo->state &= ~IS_IN_WORK_LIST; else - schedule_work(&devinfo->work); + queue_work(workqueue, &devinfo->work); } out: spin_unlock_irqrestore(&devinfo->lock, flags); } +static void uas_scan_work(struct work_struct *work) +{ + struct uas_dev_info *devinfo = + container_of(work, struct uas_dev_info, scan_work); + struct Scsi_Host *shost = usb_get_intfdata(devinfo->intf); + + dev_dbg(&devinfo->intf->dev, "starting scan\n"); + scsi_scan_host(shost); + dev_dbg(&devinfo->intf->dev, "scan complete\n"); +} + static void uas_add_work(struct uas_cmd_info *cmdinfo) { struct scsi_pointer *scp = (void *)cmdinfo; @@ -123,7 +148,7 @@ static void uas_add_work(struct uas_cmd_info *cmdinfo) lockdep_assert_held(&devinfo->lock); cmdinfo->state |= IS_IN_WORK_LIST; - schedule_work(&devinfo->work); + queue_work(workqueue, &devinfo->work); } static void uas_zap_pending(struct uas_dev_info *devinfo, int result) @@ -179,6 +204,9 @@ static void uas_log_cmd_state(struct scsi_cmnd *cmnd, const char *prefix, struct uas_cmd_info *ci = (void *)&cmnd->SCp; struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; + if (status == -ENODEV) /* too late */ + return; + scmd_printk(KERN_INFO, cmnd, "%s %d uas-tag %d inflight:%s%s%s%s%s%s%s%s%s%s%s%s ", prefix, status, cmdinfo->uas_tag, @@ -989,6 +1017,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) init_usb_anchor(&devinfo->data_urbs); spin_lock_init(&devinfo->lock); INIT_WORK(&devinfo->work, uas_do_work); + INIT_WORK(&devinfo->scan_work, uas_scan_work); result = uas_configure_endpoints(devinfo); if (result) @@ -1005,7 +1034,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) if (result) goto free_streams; - scsi_scan_host(shost); + /* Submit the delayed_work for SCSI-device scanning */ + schedule_work(&devinfo->scan_work); + return result; free_streams: @@ -1173,6 +1204,12 @@ static void uas_disconnect(struct usb_interface *intf) usb_kill_anchored_urbs(&devinfo->data_urbs); uas_zap_pending(devinfo, DID_NO_CONNECT); + /* + * Prevent SCSI scanning (if it hasn't started yet) + * or wait for the SCSI-scanning routine to stop. + */ + cancel_work_sync(&devinfo->scan_work); + scsi_remove_host(shost); uas_free_streams(devinfo); scsi_host_put(shost); @@ -1212,7 +1249,31 @@ static struct usb_driver uas_driver = { .id_table = uas_usb_ids, }; -module_usb_driver(uas_driver); +static int __init uas_init(void) +{ + int rv; + + workqueue = alloc_workqueue("uas", WQ_MEM_RECLAIM, 0); + if (!workqueue) + return -ENOMEM; + + rv = usb_register(&uas_driver); + if (rv) { + destroy_workqueue(workqueue); + return -ENOMEM; + } + + return 0; +} + +static void __exit uas_exit(void) +{ + usb_deregister(&uas_driver); + destroy_workqueue(workqueue); +} + +module_init(uas_init); +module_exit(uas_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR( diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index fb69cb64f7d457a097541d8c54bc6ec4c46eabcb..5c3f2eaf59e8c2f641d3f783006b8280040d59e5 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1277,6 +1277,12 @@ UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999, USB_SC_RBC, USB_PR_BULK, NULL, 0 ), +UNUSUAL_DEV(0x090c, 0x1000, 0x1100, 0x1100, + "Samsung", + "Flash Drive FIT", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_MAX_SECTORS_64), + /* aeb */ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff, "Feiya", @@ -2336,6 +2342,13 @@ UNUSUAL_DEV( 0x3340, 0xffff, 0x0000, 0x0000, USB_SC_DEVICE,USB_PR_DEVICE,NULL, US_FL_MAX_SECTORS_64 ), +/* Reported by Cyril Roelandt */ +UNUSUAL_DEV( 0x357d, 0x7788, 0x0114, 0x0114, + "JMicron", + "USB to ATA/ATAPI Bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BROKEN_FUA ), + /* Reported by Andrey Rahmatullin */ UNUSUAL_DEV( 0x4102, 0x1020, 0x0100, 0x0100, "iRiver", diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 8a00d80e415de3cad03c0fdad6dd1943b6834171..2c68fa5b02219dc4295c1a0ac9e86040d701dfaf 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -380,8 +380,8 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, vma = find_vma_intersection(mm, vaddr, vaddr + 1); if (vma && vma->vm_flags & VM_PFNMAP) { - *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - if (is_invalid_reserved_pfn(*pfn)) + if (!follow_pfn(vma, vaddr, pfn) && + is_invalid_reserved_pfn(*pfn)) ret = 0; } @@ -593,7 +593,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, continue; } - remote_vaddr = dma->vaddr + iova - dma->iova; + remote_vaddr = dma->vaddr + (iova - dma->iova); ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], do_accounting); if (ret) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4d11152e60c155007a18b0d390c931d87588fc8c..8fe07622ae59ef5b863419554b314ef145832526 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1025,11 +1025,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) static struct socket *get_raw_socket(int fd) { - struct { - struct sockaddr_ll sa; - char buf[MAX_ADDR_LEN]; - } uaddr; - int uaddr_len = sizeof uaddr, r; + int r; struct socket *sock = sockfd_lookup(fd, &r); if (!sock) @@ -1041,12 +1037,7 @@ static struct socket *get_raw_socket(int fd) goto err; } - r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, - &uaddr_len, 0); - if (r) - goto err; - - if (uaddr.sa.sll_family != AF_PACKET) { + if (sock->sk->sk_family != AF_PACKET) { r = -EPFNOSUPPORT; goto err; } diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 6391dc5b0ebe89de2d0cf3a676deb3575d395b86..834e88e20550f2c1729946447dea1ee033e41b7d 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -499,6 +499,11 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) mutex_unlock(&vq->mutex); } + /* Some packets may have been queued before the device was started, + * let's kick the send worker to send them. + */ + vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + mutex_unlock(&vsock->dev.mutex); return 0; diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index f09e17b60e45ef611f51e2e21528432731fac8f0..0fdd6761d6c31c1b43c6a58ea7b3a1bf49db9f57 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1310,6 +1310,9 @@ static int vgacon_font_get(struct vc_data *c, struct console_font *font) static int vgacon_resize(struct vc_data *c, unsigned int width, unsigned int height, unsigned int user) { + if ((width << 1) * height > vga_vram_size) + return -EINVAL; + if (width % 2 || width > screen_info.orig_video_cols || height > (screen_info.orig_video_lines * vga_default_font_height)/ c->vc_font.height) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 553763a637501401cd69cd035aa5a8f1773de0d5..2f946cdd9945a958f9c9eb7e422ecb273886db46 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -1221,6 +1221,9 @@ static void fbcon_deinit(struct vc_data *vc) if (!con_is_bound(&fb_con)) fbcon_exit(); + if (vc->vc_num == logo_shown) + logo_shown = FBCON_LOGO_CANSHOW; + return; } diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index b5b66b288bc9fc2efbfbb9e51a0bd4812b1e97d5..4e2c6c8e549a9390cc9c2b9ded3cd3c34def8b19 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1148,7 +1148,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, case FBIOGET_FSCREENINFO: if (!lock_fb_info(info)) return -ENODEV; - fix = info->fix; + memcpy(&fix, &info->fix, sizeof(fix)); unlock_fb_info(info); ret = copy_to_user(argp, &fix, sizeof(fix)) ? -EFAULT : 0; diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index d059d04c63acd7bc118bbc0ec02fc3cb1dfa41f8..20195d3dbf088c87195c2976f09acefb1f5895da 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -769,8 +769,8 @@ static int pxa168fb_probe(struct platform_device *pdev) failed_free_clk: clk_disable_unprepare(fbi->clk); failed_free_fbmem: - dma_free_coherent(fbi->dev, info->fix.smem_len, - info->screen_base, fbi->fb_start_dma); + dma_free_wc(fbi->dev, info->fix.smem_len, + info->screen_base, fbi->fb_start_dma); failed_free_info: kfree(info); @@ -804,7 +804,7 @@ static int pxa168fb_remove(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); - dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len), + dma_free_wc(fbi->dev, info->fix.smem_len, info->screen_base, info->fix.smem_start); clk_disable_unprepare(fbi->clk); diff --git a/drivers/video/fbdev/sis/init301.c b/drivers/video/fbdev/sis/init301.c index 1ec9c3e0e1d85092f4e06b3a2db04d9836e36a1f..f23a381442d3dc090f6bd1cb289a206aec3120dc 100644 --- a/drivers/video/fbdev/sis/init301.c +++ b/drivers/video/fbdev/sis/init301.c @@ -522,9 +522,7 @@ SiS_PanelDelay(struct SiS_Private *SiS_Pr, unsigned short DelayTime) SiS_DDC2Delay(SiS_Pr, 0x4000); } - } else if((SiS_Pr->SiS_IF_DEF_LVDS == 1) /* || - (SiS_Pr->SiS_CustomT == CUT_COMPAQ1280) || - (SiS_Pr->SiS_CustomT == CUT_CLEVO1400) */ ) { /* 315 series, LVDS; Special */ + } else if (SiS_Pr->SiS_IF_DEF_LVDS == 1) { /* 315 series, LVDS; Special */ if(SiS_Pr->SiS_IF_DEF_CH70xx == 0) { PanelID = SiS_GetReg(SiS_Pr->SiS_P3d4,0x36); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 499531608fa26813c39e046f8b6d6e0b94ccb34d..71970773aad1375f796a17543b9343dd09408be1 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -132,6 +132,8 @@ static void set_page_pfns(struct virtio_balloon *vb, { unsigned int i; + BUILD_BUG_ON(VIRTIO_BALLOON_PAGES_PER_PAGE > VIRTIO_BALLOON_ARRAY_PFNS_MAX); + /* * Set balloon pfns pointing at this page. * Note that the first pfn points at start of the page. diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c index 30b3acc9383309509862e0d1b942c22bc82a5361..e81ec763b5555026f9bcc395d620199f9fd58079 100644 --- a/drivers/vme/bridges/vme_fake.c +++ b/drivers/vme/bridges/vme_fake.c @@ -418,8 +418,9 @@ static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr, } } -static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u8 fake_vmeread8(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u8 retval = 0xff; int i; @@ -450,8 +451,9 @@ static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr, return retval; } -static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u16 fake_vmeread16(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u16 retval = 0xffff; int i; @@ -482,8 +484,9 @@ static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr, return retval; } -static u32 fake_vmeread32(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u32 fake_vmeread32(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u32 retval = 0xffffffff; int i; @@ -613,8 +616,9 @@ static ssize_t fake_master_read(struct vme_master_resource *image, void *buf, return retval; } -static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite8(struct fake_driver *bridge, + u8 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; @@ -643,8 +647,9 @@ static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf, } -static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite16(struct fake_driver *bridge, + u16 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; @@ -673,8 +678,9 @@ static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf, } -static void fake_vmewrite32(struct fake_driver *bridge, u32 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite32(struct fake_driver *bridge, + u32 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c index 9083d3d922b0b4304a3623b307c3b6a66e99f2ef..79383ff620199c20db7d97f61366a24d499d3760 100644 --- a/drivers/watchdog/da9062_wdt.c +++ b/drivers/watchdog/da9062_wdt.c @@ -126,13 +126,6 @@ static int da9062_wdt_stop(struct watchdog_device *wdd) struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd); int ret; - ret = da9062_reset_watchdog_timer(wdt); - if (ret) { - dev_err(wdt->hw->dev, "Failed to ping the watchdog (err = %d)\n", - ret); - return ret; - } - ret = regmap_update_bits(wdt->hw->regmap, DA9062AA_CONTROL_D, DA9062AA_TWDSCALE_MASK, diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index b30fb637ae947885e04eba1819e7d8cd145764d1..52e03f1c76e383e1d70675a6f4057144af34de0b 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -245,6 +245,7 @@ static int watchdog_start(struct watchdog_device *wdd) if (err == 0) { set_bit(WDOG_ACTIVE, &wdd->status); wd_data->last_keepalive = started_at; + wd_data->last_hw_keepalive = started_at; watchdog_update_worker(wdd); } diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index 0da9943d405f8ff89efc87a0cf6df5ff43df9d6f..c310e841561cbb1b15d03e498a764f25e1b9e1c5 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -392,7 +392,7 @@ static int wdat_wdt_probe(struct platform_device *pdev) memset(&r, 0, sizeof(r)); r.start = gas->address; - r.end = r.start + gas->access_width - 1; + r.end = r.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { r.flags = IORESOURCE_MEM; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c index 08cb419eb4e63c90f30ea7cdb541d31c14da2391..5f6b77ea34fb5b5ddef4d4fb5e3ba722194ded6f 100644 --- a/drivers/xen/preempt.c +++ b/drivers/xen/preempt.c @@ -37,7 +37,9 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void) * cpu. */ __this_cpu_write(xen_in_preemptible_hcall, false); - _cond_resched(); + local_irq_enable(); + cond_resched(); + local_irq_disable(); __this_cpu_write(xen_in_preemptible_hcall, true); } } diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index a1c17000129ba1cb4465df0bc679a0756f8937a3..e94a61eaeceb08a0172d50a3b8070f0c72ce9705 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -450,7 +450,14 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, unsigned int nr_grefs, void **vaddr) { - return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr); + int err; + + err = ring_ops->map(dev, gnt_refs, nr_grefs, vaddr); + /* Some hypervisors are buggy and can return 1. */ + if (err > 0) + err = GNTST_general_error; + + return err; } EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index d239fc3c5e3ded66ad28194fcdc7d09a6df801da..eb5151fc8efab02899ce764b99a8d6f2955e9ca7 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -313,6 +313,8 @@ static int process_msg(void) req->msg.type = state.msg.type; req->msg.len = state.msg.len; req->body = state.body; + /* write body, then update state */ + virt_wmb(); req->state = xb_req_state_got_reply; req->cb(req); } else @@ -395,6 +397,8 @@ static int process_writes(void) if (state.req->state == xb_req_state_aborted) kfree(state.req); else { + /* write err, then update state */ + virt_wmb(); state.req->state = xb_req_state_got_reply; wake_up(&state.req->wq); } diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 3f3b29398ab8e2b711ce724cf756ea8c241433cb..b609c6e08796e45bebdc1fbb79f5f97c892a66c6 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -188,8 +188,11 @@ static bool xenbus_ok(void) static bool test_reply(struct xb_req_data *req) { - if (req->state == xb_req_state_got_reply || !xenbus_ok()) + if (req->state == xb_req_state_got_reply || !xenbus_ok()) { + /* read req->state before all other fields */ + virt_rmb(); return true; + } /* Make sure to reread req->state each time. */ barrier(); @@ -199,7 +202,7 @@ static bool test_reply(struct xb_req_data *req) static void *read_reply(struct xb_req_data *req) { - while (req->state != xb_req_state_got_reply) { + do { wait_event(req->wq, test_reply(req)); if (!xenbus_ok()) @@ -213,7 +216,7 @@ static void *read_reply(struct xb_req_data *req) if (req->err) return ERR_PTR(req->err); - } + } while (req->state != xb_req_state_got_reply); return req->body; } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ccc9c708a860a5d5c405a7198e9a29110f4dc390..7dc9c78a1c31cac07c13f5516a9cc88b4861b2a6 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -173,7 +173,7 @@ void afs_put_call(struct afs_call *call) int n = atomic_dec_return(&call->usage); int o = atomic_read(&afs_outstanding_calls); - trace_afs_call(call, afs_call_trace_put, n + 1, o, + trace_afs_call(call, afs_call_trace_put, n, o, __builtin_return_address(0)); ASSERTCMP(n, >=, 0); @@ -619,7 +619,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, u = __atomic_add_unless(&call->usage, 1, 0); if (u != 0) { - trace_afs_call(call, afs_call_trace_wake, u, + trace_afs_call(call, afs_call_trace_wake, u + 1, atomic_read(&afs_outstanding_calls), __builtin_return_address(0)); diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 72d7589072f523cf283b27b053aef31cc399d130..92615badc17341f3749577d465fd2fdb4d9aa3d5 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -447,3 +447,11 @@ void btrfs_set_work_high_priority(struct btrfs_work *work) { set_bit(WORK_HIGH_PRIO_BIT, &work->flags); } + +void btrfs_flush_workqueue(struct btrfs_workqueue *wq) +{ + if (wq->high) + flush_workqueue(wq->high->normal_wq); + + flush_workqueue(wq->normal->normal_wq); +} diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index fc957e00cef144385e8368a44cfc418b412eadc0..2a25aef6ef2a5a86cfb71718ce672635292081d9 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -85,4 +85,6 @@ void btrfs_set_work_high_priority(struct btrfs_work *work); struct btrfs_fs_info *btrfs_work_owner(const struct btrfs_work *work); struct btrfs_fs_info *btrfs_workqueue_owner(const struct __btrfs_workqueue *wq); bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq); +void btrfs_flush_workqueue(struct btrfs_workqueue *wq); + #endif diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 7d5a9b51f0d7a81fa7d5adf9900e4fcc744a80c0..4be07cf31d74cc9c1e7303dbdeec1e8f2bbb4d6a 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -642,7 +642,6 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev, static int btrfsic_process_superblock(struct btrfsic_state *state, struct btrfs_fs_devices *fs_devices) { - struct btrfs_fs_info *fs_info = state->fs_info; struct btrfs_super_block *selected_super; struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; @@ -713,7 +712,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, break; } - num_copies = btrfs_num_copies(fs_info, next_bytenr, + num_copies = btrfs_num_copies(state->fs_info, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) pr_info("num_copies(log_bytenr=%llu) = %d\n", diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 87414fc9e268f98f9b6df4f470c1185793512998..416fb50a5378c5fd74b2438275f13363f9b2830e 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -18,6 +18,7 @@ */ #include +#include #include "delayed-inode.h" #include "disk-io.h" #include "transaction.h" @@ -833,11 +834,14 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *leaf; + unsigned int nofs_flag; char *ptr; int ret; + nofs_flag = memalloc_nofs_save(); ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key, delayed_item->data_len); + memalloc_nofs_restore(nofs_flag); if (ret < 0 && ret != -EEXIST) return ret; @@ -966,6 +970,7 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_delayed_node *node) { struct btrfs_delayed_item *curr, *prev; + unsigned int nofs_flag; int ret = 0; do_again: @@ -974,7 +979,9 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans, if (!curr) goto delete_fail; + nofs_flag = memalloc_nofs_save(); ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1); + memalloc_nofs_restore(nofs_flag); if (ret < 0) goto delete_fail; else if (ret > 0) { @@ -1041,6 +1048,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; + unsigned int nofs_flag; int mod; int ret; @@ -1053,7 +1061,9 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, else mod = 1; + nofs_flag = memalloc_nofs_save(); ret = btrfs_lookup_inode(trans, root, path, &key, mod); + memalloc_nofs_restore(nofs_flag); if (ret > 0) { btrfs_release_path(path); return -ENOENT; @@ -1104,7 +1114,10 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, key.type = BTRFS_INODE_EXTREF_KEY; key.offset = -1; + + nofs_flag = memalloc_nofs_save(); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + memalloc_nofs_restore(nofs_flag); if (ret < 0) goto err_out; ASSERT(ret); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 44b15617c7b9ff998356b0463830ecfcfaab0a44..096c015b22a46750c0374e22b0d08a518a37c845 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2913,6 +2913,7 @@ int open_ctree(struct super_block *sb, /* do not make disk changes in broken FS or nologreplay is given */ if (btrfs_super_log_root(disk_super) != 0 && !btrfs_test_opt(fs_info, NOLOGREPLAY)) { + btrfs_info(fs_info, "start tree-log replay"); ret = btrfs_replay_log(fs_info, fs_devices); if (ret) { err = ret; @@ -3724,6 +3725,19 @@ void close_ctree(struct btrfs_fs_info *fs_info) */ btrfs_delete_unused_bgs(fs_info); + /* + * There might be existing delayed inode workers still running + * and holding an empty delayed inode item. We must wait for + * them to complete first because they can create a transaction. + * This happens when someone calls btrfs_balance_delayed_items() + * and then a transaction commit runs the same delayed nodes + * before any delayed worker has done something with the nodes. + * We must wait for any worker here and not at transaction + * commit time since that could cause a deadlock. + * This is a very rare case. + */ + btrfs_flush_workqueue(fs_info->delayed_workers); + ret = btrfs_commit_super(fs_info); if (ret) btrfs_err(fs_info, "commit super ret %d", ret); @@ -4393,7 +4407,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, wake_up(&fs_info->transaction_wait); btrfs_destroy_delayed_inodes(fs_info); - btrfs_assert_delayed_root_empty(fs_info); btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fd15f396b3a0951979acde7146415b84f5c95308..51e26f90f0bb28da6a064815ce9554834cc43cec 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10554,7 +10554,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; - goto out; + goto out_put_group; } /* @@ -10591,7 +10591,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) { btrfs_add_delayed_iput(inode); - goto out; + goto out_put_group; } clear_nlink(inode); /* One for the block groups ref */ @@ -10614,13 +10614,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); if (ret < 0) - goto out; + goto out_put_group; if (ret > 0) btrfs_release_path(path); if (ret == 0) { ret = btrfs_del_item(trans, tree_root, path); if (ret) - goto out; + goto out_put_group; btrfs_release_path(path); } @@ -10778,9 +10778,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = remove_block_group_free_space(trans, fs_info, block_group); if (ret) - goto out; + goto out_put_group; - btrfs_put_block_group(block_group); + /* Once for the block groups rbtree */ btrfs_put_block_group(block_group); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); @@ -10790,6 +10790,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, goto out; ret = btrfs_del_item(trans, root, path); + +out_put_group: + /* Once for the lookup reference */ + btrfs_put_block_group(block_group); out: btrfs_free_path(path); return ret; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2e348fb0b280d64ae158a28713e33a8d48f0598d..c87d673ce3347c0cb755f0abeb8fefb00af7c835 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -228,6 +228,17 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) struct extent_map *merge = NULL; struct rb_node *rb; + /* + * We can't modify an extent map that is in the tree and that is being + * used by another task, as it can cause that other task to see it in + * inconsistent state during the merging. We always have 1 reference for + * the tree and 1 for this task (which is unpinning the extent map or + * clearing the logging flag), so anything > 2 means it's being used by + * other tasks too. + */ + if (refcount_read(&em->refs) > 2) + return; + if (em->start != 0) { rb = rb_prev(&em->rb_node); if (rb) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 702b3606ad0ec4818bf475fdef29e3220b7861c3..717d82d51bb13c619b66c3e3d967b33166d7b77a 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -288,7 +288,8 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio csum += count * csum_size; nblocks -= count; next: - while (count--) { + while (count > 0) { + count--; disk_bytenr += fs_info->sectorsize; offset += fs_info->sectorsize; page_bytes_left -= fs_info->sectorsize; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index abecc4724a3bc931ca4d4c4d8c984e58c4bcf53f..2a196bb134d9febda7f7d31eea396dfab4531f5b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10639,6 +10639,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 cur_offset = start; + u64 clear_offset = start; u64 i_size; u64 cur_bytes; u64 last_alloc = (u64)-1; @@ -10673,6 +10674,15 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, btrfs_end_transaction(trans); break; } + + /* + * We've reserved this space, and thus converted it from + * ->bytes_may_use to ->bytes_reserved. Any error that happens + * from here on out we will only need to clear our reservation + * for the remaining unreserved area, so advance our + * clear_offset by our extent size. + */ + clear_offset += ins.offset; btrfs_dec_block_group_reservations(fs_info, ins.objectid); last_alloc = ins.offset; @@ -10753,9 +10763,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, if (own_trans) btrfs_end_transaction(trans); } - if (cur_offset < end) - btrfs_free_reserved_data_space(inode, NULL, cur_offset, - end - cur_offset + 1); + if (clear_offset < end) + btrfs_free_reserved_data_space(inode, NULL, clear_offset, + end - clear_offset + 1); return ret; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a3aca495e33e2c97890551b409afd1ceb40b4f69..d2287ea9fc50eef189894e5d15a94c5460667a7c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -838,10 +838,15 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) } btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; + /* + * If the ordered extent had an error save the error but don't + * exit without waiting first for all other ordered extents in + * the range to complete. + */ if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) ret = -EIO; btrfs_put_ordered_extent(ordered); - if (ret || end == 0 || end == start) + if (end == 0 || end == start) break; end--; } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d4c00edd16d2b20efe278804d90e4730df742148..f4397dd19583bab10693ad39d6d7aa68bba7ad92 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -538,8 +538,8 @@ static int should_ignore_root(struct btrfs_root *root) if (!reloc_root) return 0; - if (btrfs_root_last_snapshot(&reloc_root->root_item) == - root->fs_info->running_transaction->transid - 1) + if (btrfs_header_generation(reloc_root->commit_root) == + root->fs_info->running_transaction->transid) return 0; /* * if there is reloc tree and it was created in previous @@ -1194,7 +1194,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, free_backref_node(cache, lower); } - free_backref_node(cache, node); + remove_backref_node(cache, node); return ERR_PTR(err); } ASSERT(!node || !node->detached); @@ -1306,7 +1306,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) if (!node) return -ENOMEM; - node->bytenr = root->node->start; + node->bytenr = root->commit_root->start; node->data = root; spin_lock(&rc->reloc_root_tree.lock); @@ -1337,10 +1337,11 @@ static void __del_reloc_root(struct btrfs_root *root) if (rc && root->node) { spin_lock(&rc->reloc_root_tree.lock); rb_node = tree_search(&rc->reloc_root_tree.rb_root, - root->node->start); + root->commit_root->start); if (rb_node) { node = rb_entry(rb_node, struct mapping_node, rb_node); rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); + RB_CLEAR_NODE(&node->rb_node); } spin_unlock(&rc->reloc_root_tree.lock); if (!node) @@ -1358,7 +1359,7 @@ static void __del_reloc_root(struct btrfs_root *root) * helper to update the 'address of tree root -> reloc tree' * mapping */ -static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) +static int __update_reloc_root(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct rb_node *rb_node; @@ -1367,7 +1368,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) spin_lock(&rc->reloc_root_tree.lock); rb_node = tree_search(&rc->reloc_root_tree.rb_root, - root->node->start); + root->commit_root->start); if (rb_node) { node = rb_entry(rb_node, struct mapping_node, rb_node); rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); @@ -1379,7 +1380,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) BUG_ON((struct btrfs_root *)node->data != root); spin_lock(&rc->reloc_root_tree.lock); - node->bytenr = new_bytenr; + node->bytenr = root->node->start; rb_node = tree_insert(&rc->reloc_root_tree.rb_root, node->bytenr, &node->rb_node); spin_unlock(&rc->reloc_root_tree.lock); @@ -1524,6 +1525,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, } if (reloc_root->commit_root != reloc_root->node) { + __update_reloc_root(reloc_root); btrfs_set_root_node(root_item, reloc_root->node); free_extent_buffer(reloc_root->commit_root); reloc_root->commit_root = btrfs_root_node(reloc_root); @@ -2480,7 +2482,21 @@ void merge_reloc_roots(struct reloc_control *rc) free_reloc_roots(&reloc_roots); } - BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); + /* + * We used to have + * + * BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); + * + * here, but it's wrong. If we fail to start the transaction in + * prepare_to_merge() we will have only 0 ref reloc roots, none of which + * have actually been removed from the reloc_root_tree rb tree. This is + * fine because we're bailing here, and we hold a reference on the root + * for the list that holds it, so these roots will be cleaned up when we + * do the reloc_dirty_list afterwards. Meanwhile the root->reloc_root + * will be cleaned up on unmount. + * + * The remaining nodes will be cleaned up by free_reloc_control. + */ } static void free_block_list(struct rb_root *blocks) @@ -4698,11 +4714,6 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, BUG_ON(rc->stage == UPDATE_DATA_PTRS && root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); - if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { - if (buf == root->node) - __update_reloc_root(root, cow->start); - } - level = btrfs_header_level(buf); if (btrfs_header_generation(buf) <= btrfs_root_last_snapshot(&root->root_item)) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3ab79fa00dc7bc02bfd6df57ef5f36488a21a657..17a8463ef35c1afb49d54b8c6fdf97a25b3e4899 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1801,6 +1801,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } if (btrfs_super_log_root(fs_info->super_copy) != 0) { + btrfs_warn(fs_info, + "mount required to replay tree-log, cannot remount read-write"); ret = -EINVAL; goto restore; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 0b62c8080af06e5159774770536c6ef0cf8b2e61..bcfb7a772c8e5b5235ee074d41fca60a1d0520a2 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4155,6 +4155,9 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, const u64 ino = btrfs_ino(inode); struct btrfs_path *dst_path = NULL; bool dropped_extents = false; + u64 truncate_offset = i_size; + struct extent_buffer *leaf; + int slot; int ins_nr = 0; int start_slot; int ret; @@ -4169,9 +4172,43 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, if (ret < 0) goto out; + /* + * We must check if there is a prealloc extent that starts before the + * i_size and crosses the i_size boundary. This is to ensure later we + * truncate down to the end of that extent and not to the i_size, as + * otherwise we end up losing part of the prealloc extent after a log + * replay and with an implicit hole if there is another prealloc extent + * that starts at an offset beyond i_size. + */ + ret = btrfs_previous_item(root, path, ino, BTRFS_EXTENT_DATA_KEY); + if (ret < 0) + goto out; + + if (ret == 0) { + struct btrfs_file_extent_item *ei; + + leaf = path->nodes[0]; + slot = path->slots[0]; + ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, ei) == + BTRFS_FILE_EXTENT_PREALLOC) { + u64 extent_end; + + btrfs_item_key_to_cpu(leaf, &key, slot); + extent_end = key.offset + + btrfs_file_extent_num_bytes(leaf, ei); + + if (extent_end > i_size) + truncate_offset = extent_end; + } + } else { + ret = 0; + } + while (true) { - struct extent_buffer *leaf = path->nodes[0]; - int slot = path->slots[0]; + leaf = path->nodes[0]; + slot = path->slots[0]; if (slot >= btrfs_header_nritems(leaf)) { if (ins_nr > 0) { @@ -4209,7 +4246,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, ret = btrfs_truncate_inode_items(trans, root->log_root, &inode->vfs_inode, - i_size, + truncate_offset, BTRFS_EXTENT_DATA_KEY); } while (ret == -EAGAIN); if (ret) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 358e930df4acd8f0d397c4e0a68db1dec0b20167..6d34842912e8048b0228ad811aaa509c322ff972 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7227,6 +7227,8 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, else btrfs_dev_stat_reset(dev, i); } + btrfs_info(fs_info, "device stats zeroed by %s (%d)", + current->comm, task_pid_nr(current)); } else { for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) if (stats->nr_items > i) diff --git a/fs/buffer.c b/fs/buffer.c index 2ebd195996a319dfc0f0c6a9fdaa2ec7eef88978..758b6056ad912c4bbbda542310bc97473a440565 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1398,6 +1398,17 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) } EXPORT_SYMBOL(__breadahead); +void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size, + gfp_t gfp) +{ + struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); + if (likely(bh)) { + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh); + brelse(bh); + } +} +EXPORT_SYMBOL(__breadahead_gfp); + /** * __bread_gfp() - reads a specified block and returns the bh * @bdev: the block_device to read from diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c3a3ee74e2d840758c470253efd5ae78c96853bb..1b5a50848b5be486ce9ad76b6508b1feb3e8d267 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1863,8 +1863,12 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, } /* want more caps from mds? */ - if (want & ~(cap->mds_wanted | cap->issued)) - goto ack; + if (want & ~cap->mds_wanted) { + if (want & ~(cap->mds_wanted | cap->issued)) + goto ack; + if (!__cap_is_valid(cap)) + goto ack; + } /* things we might delay */ if ((cap->issued & ~retain) == 0 && diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 3c59ad180ef0bb5b9dfb09528e5c1dc363f9b94e..4cfe1154d4c726f621b5a6f689f7e7b05ca52a39 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -151,6 +151,11 @@ static struct dentry *__get_parent(struct super_block *sb, req->r_num_caps = 1; err = ceph_mdsc_do_request(mdsc, NULL, req); + if (err) { + ceph_mdsc_put_request(req); + return ERR_PTR(err); + } + inode = req->r_target_inode; if (inode) ihold(inode); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b968334f841e81df35e2fe117035e5a6fbbc467a..f36ddfea4997e826307a368c54203adad11e0bb1 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2261,8 +2261,7 @@ static int __do_request(struct ceph_mds_client *mdsc, if (!(mdsc->fsc->mount_options->flags & CEPH_MOUNT_OPT_MOUNTWAIT) && !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { - err = -ENOENT; - pr_info("probably no mds server is up\n"); + err = -EHOSTUNREACH; goto finish; } } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 088c4488b4492cf705ba13f35a7196e8c80bfb0a..caa6780d8ff2f9e1b61e67e40ad3da3cabcecfd5 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -92,7 +92,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } - static int ceph_sync_fs(struct super_block *sb, int wait) { struct ceph_fs_client *fsc = ceph_sb_to_client(sb); @@ -189,6 +188,26 @@ static match_table_t fsopt_tokens = { {-1, NULL} }; +/* + * Remove adjacent slashes and then the trailing slash, unless it is + * the only remaining character. + * + * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". + */ +static void canonicalize_path(char *path) +{ + int i, j = 0; + + for (i = 0; path[i] != '\0'; i++) { + if (path[i] != '/' || j < 1 || path[j - 1] != '/') + path[j++] = path[i]; + } + + if (j > 1 && path[j - 1] == '/') + j--; + path[j] = '\0'; +} + static int parse_fsopt_token(char *c, void *private) { struct ceph_mount_options *fsopt = private; @@ -390,12 +409,15 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); if (ret) return ret; + ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); if (ret) return ret; + ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); if (ret) return ret; + ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); if (ret) return ret; @@ -451,13 +473,17 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, */ dev_name_end = strchr(dev_name, '/'); if (dev_name_end) { - if (strlen(dev_name_end) > 1) { - fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); - if (!fsopt->server_path) { - err = -ENOMEM; - goto out; - } + /* + * The server_path will include the whole chars from userland + * including the leading '/'. + */ + fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); + if (!fsopt->server_path) { + err = -ENOMEM; + goto out; } + + canonicalize_path(fsopt->server_path); } else { dev_name_end = dev_name + strlen(dev_name); } @@ -760,7 +786,6 @@ static void destroy_caches(void) ceph_fscache_unregister(); } - /* * ceph_umount_begin - initiate forced umount. Tear down down the * mount, skipping steps that may hang while waiting for server(s). @@ -845,9 +870,6 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, return root; } - - - /* * mount: join the ceph cluster, and open root directory. */ @@ -861,7 +883,9 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) mutex_lock(&fsc->client->mount_mutex); if (!fsc->sb->s_root) { - const char *path; + const char *path = fsc->mount_options->server_path ? + fsc->mount_options->server_path + 1 : ""; + err = __ceph_open_session(fsc->client, started); if (err < 0) goto out; @@ -873,13 +897,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) goto out; } - if (!fsc->mount_options->server_path) { - path = ""; - dout("mount opening path \\t\n"); - } else { - path = fsc->mount_options->server_path + 1; - dout("mount opening path %s\n", path); - } + dout("mount opening path '%s'\n", path); err = ceph_fs_debugfs_init(fsc); if (err < 0) @@ -1055,6 +1073,11 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, return res; out_splat: + if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { + pr_info("No mds server is up or the cluster is laggy\n"); + err = -EHOSTUNREACH; + } + ceph_mdsc_close_sessions(fsc->mdsc); deactivate_locked_super(sb); goto out_final; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 46f600107cb5b327bb86e4d5b193c523f6bbfc3a..dd5257dee6cb040376185833e8929d338e356d55 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -85,7 +85,7 @@ struct ceph_mount_options { char *snapdir_name; /* default ".snap" */ char *mds_namespace; /* default NULL */ - char *server_path; /* default "/" */ + char *server_path; /* default NULL (means "/") */ char *fscache_uniq; /* default NULL */ }; diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index b98436f5c7c74f9e773db0485bb9e80fcb43be99..73d428af97a9e974ad5450170dbbedfd3da48be5 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -603,7 +603,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode, ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) *pmode |= (S_IXUGO & (*pbits_to_set)); - cifs_dbg(NOISY, "access flags 0x%x mode now 0x%x\n", flags, *pmode); + cifs_dbg(NOISY, "access flags 0x%x mode now %04o\n", flags, *pmode); return; } @@ -632,7 +632,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use, if (mode & S_IXUGO) *pace_flags |= SET_FILE_EXEC_RIGHTS; - cifs_dbg(NOISY, "mode: 0x%x, access flags now 0x%x\n", + cifs_dbg(NOISY, "mode: %04o, access flags now 0x%x\n", mode, *pace_flags); return; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f0b1279a7de66716ecde8d3329784f5aba031127..58e7288e5151cedef80711f28f8339d36ddfd1c6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -348,8 +348,10 @@ static int reconn_set_ipaddr(struct TCP_Server_Info *server) return rc; } + spin_lock(&cifs_tcp_ses_lock); rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr, strlen(ipaddr)); + spin_unlock(&cifs_tcp_ses_lock); kfree(ipaddr); return !rc ? -1 : 0; @@ -3047,8 +3049,10 @@ match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) { struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; - bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; - bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; + bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + old->prepath; + bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + new->prepath; if (old_set && new_set && !strcmp(new->prepath, old->prepath)) return 1; @@ -3519,7 +3523,7 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, cifs_sb->mnt_gid = pvolume_info->linux_gid; cifs_sb->mnt_file_mode = pvolume_info->file_mode; cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; - cifs_dbg(FYI, "file mode: 0x%hx dir mode: 0x%hx\n", + cifs_dbg(FYI, "file mode: %04ho dir mode: %04ho\n", cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); cifs_sb->actimeo = pvolume_info->actimeo; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f00a7ce3eb6e4fa86f3c24bfeef291f7fbf17828..03293e543c075a2f41911b7f679b91178035af9f 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -562,7 +562,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (server->ops->close) server->ops->close(xid, tcon, &fid); cifs_del_pending_open(&open); - fput(file); rc = -ENOMEM; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5e75c5f77f4ceea9eb7025aec90d04d2f0a13f0c..662977b8d6aeada1da5b1ad126be437cd7354fac 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3303,7 +3303,7 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx) if (rc == -ENODATA) rc = 0; - ctx->rc = (rc == 0) ? ctx->total_len : rc; + ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; mutex_unlock(&ctx->aio_mutex); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a35c1410590676bfff56b34575797ac8d6dc0b41..bdce714e9448950f4fff2deca667e929128762e1 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1581,7 +1581,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) struct TCP_Server_Info *server; char *full_path; - cifs_dbg(FYI, "In cifs_mkdir, mode = 0x%hx inode = 0x%p\n", + cifs_dbg(FYI, "In cifs_mkdir, mode = %04ho inode = 0x%p\n", mode, inode); cifs_sb = CIFS_SB(inode->i_sb); @@ -1998,6 +1998,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) struct inode *inode = d_inode(dentry); struct super_block *sb = dentry->d_sb; char *full_path = NULL; + int count = 0; if (inode == NULL) return -ENOENT; @@ -2019,15 +2020,18 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) full_path, inode, inode->i_count.counter, dentry, cifs_get_time(dentry), jiffies); +again: if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); else rc = cifs_get_inode_info(&inode, full_path, NULL, sb, xid, NULL); - + if (rc == -EAGAIN && count++ < 10) + goto again; out: kfree(full_path); free_xid(xid); + return rc; } diff --git a/fs/dax.c b/fs/dax.c index ddb4981ae32eb910c66c493dfa829151a4ef9f72..34a55754164f44a997236e59303ae28b07434d79 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1057,6 +1057,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, lockdep_assert_held(&inode->i_rwsem); } + if (iocb->ki_flags & IOCB_NOWAIT) + flags |= IOMAP_NOWAIT; + while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, dax_iomap_actor); diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index bd25ab8370118cd40dd9e6e48aa689a3b3a02c9f..eed38ae86c6c1887ab32f8ff9b19cd658f057388 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -339,8 +339,10 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, struct extent_crypt_result ecr; int rc = 0; - BUG_ON(!crypt_stat || !crypt_stat->tfm - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); + if (!crypt_stat || !crypt_stat->tfm + || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)) + return -EINVAL; + if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", crypt_stat->key_size); diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index fa218cd64f746d2c924a786edc1c6052911c1732..b134315fb69d060974a252a1ccd2191a9be865c3 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1335,7 +1335,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, printk(KERN_WARNING "Tag 1 packet contains key larger " "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES"); rc = -EINVAL; - goto out; + goto out_free; } memcpy((*new_auth_tok)->session_key.encrypted_key, &data[(*packet_size)], (body_size - (ECRYPTFS_SIG_SIZE + 2))); diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 4f457d5c49331cb7b813b5c9ca4257bd59ce5c3b..26464f9d9b769dc4250d40e923dd4f73e7ee6e74 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -397,6 +397,7 @@ int __init ecryptfs_init_messaging(void) * ecryptfs_message_buf_len), GFP_KERNEL); if (!ecryptfs_msg_ctx_arr) { + kfree(ecryptfs_daemon_hash); rc = -ENOMEM; printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); goto out; diff --git a/fs/exec.c b/fs/exec.c index 81c438d94ae73d3a73ba5ef0ea76e71fee513aae..52ab168d4dcc5a37850347b974dcf1191f6d5104 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1373,7 +1373,7 @@ void setup_new_exec(struct linux_binprm * bprm) /* An exec changes our domain. We are no longer part of the thread group */ - current->self_exec_id++; + WRITE_ONCE(current->self_exec_id, current->self_exec_id + 1); flush_signal_handlers(current, 0); } EXPORT_SYMBOL(setup_new_exec); diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index dd8f10db82e992da8b8f4fb37699d7bf4a87cf4d..bd1d68ff3a9f81ffecb3c4138cd646532ba3733c 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -56,6 +56,7 @@ #include #include +#include #include #include #include @@ -84,8 +85,8 @@ printk("\n"); \ } while (0) #else -# define ea_idebug(f...) -# define ea_bdebug(f...) +# define ea_idebug(inode, f...) no_printk(f) +# define ea_bdebug(bh, f...) no_printk(f) #endif static int ext2_xattr_set2(struct inode *, struct buffer_head *, @@ -838,8 +839,7 @@ ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh) error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr, 1); if (error) { if (error == -EBUSY) { - ea_bdebug(bh, "already in cache (%d cache entries)", - atomic_read(&ext2_xattr_cache->c_entry_count)); + ea_bdebug(bh, "already in cache"); error = 0; } } else diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 70266a3355dc320ef141c4c2b225b5a3e9211003..fb38f20f869e71e2fbf460ca5dd7168f33e54ad8 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -280,6 +280,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, ext4_group_t ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *bh_p; if (block_group >= ngroups) { ext4_error(sb, "block_group >= groups_count - block_group = %u," @@ -290,7 +291,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); - if (!sbi->s_group_desc[group_desc]) { + bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc); + /* + * sbi_array_rcu_deref returns with rcu unlocked, this is ok since + * the pointer being dereferenced won't be dereferenced again. By + * looking at the usage in add_new_gdb() the value isn't modified, + * just the pointer, and so it remains valid. + */ + if (!bh_p) { ext4_error(sb, "Group descriptor not loaded - " "block_group = %u, group_desc = %u, desc = %u", block_group, group_desc, offset); @@ -298,10 +306,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, } desc = (struct ext4_group_desc *)( - (__u8 *)sbi->s_group_desc[group_desc]->b_data + + (__u8 *)bh_p->b_data + offset * EXT4_DESC_SIZE(sb)); if (bh) - *bh = sbi->s_group_desc[group_desc]; + *bh = bh_p; return desc; } diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index bee888e0e2db3e7ae415ab9d7f67c45b6a29061a..13eb028607caf20d171314adae0a60618a52ae2f 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -137,6 +137,49 @@ static void debug_print_tree(struct ext4_sb_info *sbi) printk(KERN_CONT "\n"); } +static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino) +{ + struct inode *inode; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_map_blocks map; + u32 i = 0, num; + int err = 0, n; + + if ((ino < EXT4_ROOT_INO) || + (ino > le32_to_cpu(sbi->s_es->s_inodes_count))) + return -EINVAL; + inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL); + if (IS_ERR(inode)) + return PTR_ERR(inode); + num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; + while (i < num) { + map.m_lblk = i; + map.m_len = num - i; + n = ext4_map_blocks(NULL, inode, &map, 0); + if (n < 0) { + err = n; + break; + } + if (n == 0) { + i++; + } else { + if (!ext4_data_block_valid(sbi, map.m_pblk, n)) { + ext4_error(sb, "blocks %llu-%llu from inode %u " + "overlap system zone", map.m_pblk, + map.m_pblk + map.m_len - 1, ino); + err = -EFSCORRUPTED; + break; + } + err = add_system_zone(sbi, map.m_pblk, n); + if (err < 0) + break; + i += n; + } + } + iput(inode); + return err; +} + int ext4_setup_system_zone(struct super_block *sb) { ext4_group_t ngroups = ext4_get_groups_count(sb); @@ -171,6 +214,12 @@ int ext4_setup_system_zone(struct super_block *sb) if (ret) return ret; } + if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) { + ret = ext4_protect_reserved_inode(sb, + le32_to_cpu(sbi->s_es->s_journal_inum)); + if (ret) + return ret; + } if (test_opt(sb, DEBUG)) debug_print_tree(EXT4_SB(sb)); @@ -227,6 +276,11 @@ int ext4_check_blockref(const char *function, unsigned int line, __le32 *bref = p; unsigned int blk; + if (ext4_has_feature_journal(inode->i_sb) && + (inode->i_ino == + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) + return 0; + while (bref < p+max) { blk = le32_to_cpu(*bref++); if (blk && diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 1014369e535b070b98f1387f4dad699e1a94e6a4..e4d13c6ac9313b5c08e4cf7a86a0c2ff2e073d74 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -126,12 +126,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) if (err != ERR_BAD_DX_DIR) { return err; } - /* - * We don't set the inode dirty flag since it's not - * critical that it get flushed back to the disk. - */ - ext4_clear_inode_flag(file_inode(file), - EXT4_INODE_INDEX); + /* Can we just clear INDEX flag to ignore htree information? */ + if (!ext4_has_metadata_csum(sb)) { + /* + * We don't set the inode dirty flag since it's not + * critical that it gets flushed back to the disk. + */ + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } if (ext4_has_inline_data(inode)) { @@ -664,49 +666,8 @@ const struct file_operations ext4_dir_operations = { }; #ifdef CONFIG_UNICODE -static int ext4_d_compare(const struct dentry *dentry, unsigned int len, - const char *str, const struct qstr *name) -{ - struct qstr qstr = {.name = str, .len = len }; - struct inode *inode = dentry->d_parent->d_inode; - - if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { - if (len != name->len) - return -1; - return memcmp(str, name->name, len); - } - - return ext4_ci_compare(inode, name, &qstr, false); -} - -static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) -{ - const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb); - const struct unicode_map *um = sbi->s_encoding; - unsigned char *norm; - int len, ret = 0; - - if (!IS_CASEFOLDED(dentry->d_inode) || !um) - return 0; - - norm = kmalloc(PATH_MAX, GFP_ATOMIC); - if (!norm) - return -ENOMEM; - - len = utf8_casefold(um, str, norm, PATH_MAX); - if (len < 0) { - if (ext4_has_strict_mode(sbi)) - ret = -EINVAL; - goto out; - } - str->hash = full_name_hash(dentry, norm, len); -out: - kfree(norm); - return ret; -} - const struct dentry_operations ext4_dentry_ops = { - .d_hash = ext4_d_hash, - .d_compare = ext4_d_compare, + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, }; #endif diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3bab4b55fa3ccc3a5a35bd0141b4f27dc99326da..1578a86784a67606ac79f5b43cd52ed86b724f69 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1378,14 +1378,6 @@ struct ext4_super_block { #define EXT4_ENC_UTF8_12_1 1 -/* - * Flags for ext4_sb_info.s_encoding_flags. - */ -#define EXT4_ENC_STRICT_MODE_FL (1 << 0) - -#define ext4_has_strict_mode(sbi) \ - (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL) - /* * fourth extended-fs super-block data in memory */ @@ -1406,7 +1398,7 @@ struct ext4_sb_info { loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ - struct buffer_head **s_group_desc; + struct buffer_head * __rcu *s_group_desc; unsigned int s_mount_opt; unsigned int s_mount_opt2; unsigned int s_mount_flags; @@ -1437,10 +1429,6 @@ struct ext4_sb_info { struct kobject s_kobj; struct completion s_kobj_unregister; struct super_block *s_sb; -#ifdef CONFIG_UNICODE - struct unicode_map *s_encoding; - __u16 s_encoding_flags; -#endif /* Journaling */ struct journal_s *s_journal; @@ -1470,7 +1458,7 @@ struct ext4_sb_info { #endif /* for buddy allocator */ - struct ext4_group_info ***s_group_info; + struct ext4_group_info ** __rcu *s_group_info; struct inode *s_buddy_cache; spinlock_t s_md_lock; unsigned short *s_mb_offsets; @@ -1520,7 +1508,7 @@ struct ext4_sb_info { unsigned int s_extent_max_zeroout_kb; unsigned int s_log_groups_per_flex; - struct flex_groups *s_flex_groups; + struct flex_groups * __rcu *s_flex_groups; ext4_group_t s_flex_groups_allocated; /* workqueue for reserved extent conversions (buffered io) */ @@ -1560,8 +1548,11 @@ struct ext4_sb_info { struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; - /* Barrier between changing inodes' journal flags and writepages ops. */ - struct percpu_rw_semaphore s_journal_flag_rwsem; + /* + * Barrier between writepages ops and changing any inode's JOURNAL_DATA + * or EXTENTS flag. + */ + struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; }; @@ -1581,6 +1572,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } +/* + * Returns: sbi->field[index] + * Used to access an array element from the following sbi fields which require + * rcu protection to avoid dereferencing an invalid pointer due to reassignment + * - s_group_desc + * - s_group_info + * - s_flex_group + */ +#define sbi_array_rcu_deref(sbi, field, index) \ +({ \ + typeof(*((sbi)->field)) _v; \ + rcu_read_lock(); \ + _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \ + rcu_read_unlock(); \ + _v; \ +}) + /* * Inode dynamic state flags */ @@ -2486,8 +2494,11 @@ void ext4_insert_dentry(struct inode *inode, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!ext4_has_feature_dir_index(inode->i_sb)) + if (!ext4_has_feature_dir_index(inode->i_sb)) { + /* ext4_iget() should have caught this... */ + WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb)); ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } static const unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK @@ -2586,8 +2597,19 @@ int do_journal_get_write_access(handle_t *handle, #define FALL_BACK_TO_NONDELALLOC 1 #define CONVERT_INLINE_DATA 2 -extern struct inode *ext4_iget(struct super_block *, unsigned long); -extern struct inode *ext4_iget_normal(struct super_block *, unsigned long); +typedef enum { + EXT4_IGET_NORMAL = 0, + EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */ + EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */ +} ext4_iget_flags; + +extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, + ext4_iget_flags flags, const char *function, + unsigned int line); + +#define ext4_iget(sb, ino, flags) \ + __ext4_iget((sb), (ino), (flags), __func__, __LINE__) + extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(const struct path *, struct kstat *, u32, unsigned int); @@ -2663,6 +2685,7 @@ extern int ext4_generic_delete_entry(handle_t *handle, extern bool ext4_empty_dir(struct inode *inode); /* resize.c */ +extern void ext4_kvfree_array_rcu(void *to_free); extern int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input); extern int ext4_group_extend(struct super_block *sb, @@ -2908,13 +2931,13 @@ static inline struct ext4_group_info *ext4_get_group_info(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info ***grp_info; + struct ext4_group_info **grp_info; long indexv, indexh; BUG_ON(group >= EXT4_SB(sb)->s_groups_count); - grp_info = EXT4_SB(sb)->s_group_info; indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); - return grp_info[indexv][indexh]; + grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); + return grp_info[indexh]; } /* @@ -2964,7 +2987,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) !inode_is_locked(inode)); down_write(&EXT4_I(inode)->i_data_sem); if (newsize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = newsize; + WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize); up_write(&EXT4_I(inode)->i_data_sem); } @@ -3205,7 +3228,7 @@ static inline void ext4_set_de_type(struct super_block *sb, /* readpages.c */ extern int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages); + unsigned nr_pages, bool is_readahead); extern int __init ext4_init_post_read_processing(void); extern void ext4_exit_post_read_processing(void); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ceddff15e548f7415ef02dd4a02b42c4e1af567a..2824b5c6518744783abb9181de31dc3633161b95 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -510,6 +510,30 @@ int ext4_ext_check_inode(struct inode *inode) return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); } +static void ext4_cache_extents(struct inode *inode, + struct ext4_extent_header *eh) +{ + struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); + ext4_lblk_t prev = 0; + int i; + + for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { + unsigned int status = EXTENT_STATUS_WRITTEN; + ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); + int len = ext4_ext_get_actual_len(ex); + + if (prev && (prev != lblk)) + ext4_es_cache_extent(inode, prev, lblk - prev, ~0, + EXTENT_STATUS_HOLE); + + if (ext4_ext_is_unwritten(ex)) + status = EXTENT_STATUS_UNWRITTEN; + ext4_es_cache_extent(inode, lblk, len, + ext4_ext_pblock(ex), status); + prev = lblk + len; + } +} + static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, struct inode *inode, ext4_fsblk_t pblk, int depth, @@ -530,36 +554,21 @@ __read_extent_tree_block(const char *function, unsigned int line, } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); - if (err) - goto errout; + if (!ext4_has_feature_journal(inode->i_sb) || + (inode->i_ino != + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) { + err = __ext4_ext_check(function, line, inode, + ext_block_hdr(bh), depth, pblk); + if (err) + goto errout; + } set_buffer_verified(bh); /* * If this is a leaf block, cache all of its entries */ if (!(flags & EXT4_EX_NOCACHE) && depth == 0) { struct ext4_extent_header *eh = ext_block_hdr(bh); - struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); - ext4_lblk_t prev = 0; - int i; - - for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { - unsigned int status = EXTENT_STATUS_WRITTEN; - ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); - int len = ext4_ext_get_actual_len(ex); - - if (prev && (prev != lblk)) - ext4_es_cache_extent(inode, prev, - lblk - prev, ~0, - EXTENT_STATUS_HOLE); - - if (ext4_ext_is_unwritten(ex)) - status = EXTENT_STATUS_UNWRITTEN; - ext4_es_cache_extent(inode, lblk, len, - ext4_ext_pblock(ex), status); - prev = lblk + len; - } + ext4_cache_extents(inode, eh); } return bh; errout: @@ -907,6 +916,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, path[0].p_bh = NULL; i = depth; + if (!(flags & EXT4_EX_NOCACHE) && depth == 0) + ext4_cache_extents(inode, eh); /* walk through the tree */ while (i) { ext_debug("depth %d: num %d, max %d\n", @@ -3446,8 +3457,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, (unsigned long long)map->m_lblk, map_len); sbi = EXT4_SB(inode->i_sb); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map_len) eof_block = map->m_lblk + map_len; @@ -3702,8 +3713,8 @@ static int ext4_split_convert_extents(handle_t *handle, __func__, inode->i_ino, (unsigned long long)map->m_lblk, map->m_len); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map->m_len) eof_block = map->m_lblk + map->m_len; /* diff --git a/fs/ext4/file.c b/fs/ext4/file.c index e6c864668d0775d1f17e8fedc4bd8925e9f547bb..32a0f04d8517fd899b881d00045c5583cd5198ea 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -38,9 +38,10 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - if (!inode_trylock_shared(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) return -EAGAIN; + } else { inode_lock_shared(inode); } /* @@ -188,9 +189,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - if (!inode_trylock(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) return -EAGAIN; + } else { inode_lock(inode); } ret = ext4_write_checks(iocb, from); diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index ed76a6d7a2d89c75a4c5a193ffe9c7c6d910aeef..f3bc69b8d4e5351c5caad61d6f3ece89a8899258 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -277,7 +277,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len, struct dx_hash_info *hinfo) { #ifdef CONFIG_UNICODE - const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding; + const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; struct qstr qstr = {.name = name, .len = len }; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 6f9355227dc81ce62dedc771ddb220bbf382bd32..b9d4cb194462e50b41e683ac0db74232ba2f8092 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -333,11 +333,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) percpu_counter_inc(&sbi->s_freeinodes_counter); if (sbi->s_log_groups_per_flex) { - ext4_group_t f = ext4_flex_group(sbi, block_group); + struct flex_groups *fg; - atomic_inc(&sbi->s_flex_groups[f].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, + ext4_flex_group(sbi, block_group)); + atomic_inc(&fg->free_inodes); if (is_directory) - atomic_dec(&sbi->s_flex_groups[f].used_dirs); + atomic_dec(&fg->used_dirs); } BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); @@ -378,12 +380,13 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, int flex_size, struct orlov_stats *stats) { struct ext4_group_desc *desc; - struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; if (flex_size > 1) { - stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); - stats->used_dirs = atomic_read(&flex_group[g].used_dirs); + struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb), + s_flex_groups, g); + stats->free_inodes = atomic_read(&fg->free_inodes); + stats->free_clusters = atomic64_read(&fg->free_clusters); + stats->used_dirs = atomic_read(&fg->used_dirs); return; } @@ -670,7 +673,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, * block has been written back to disk. (Yes, these values are * somewhat arbitrary...) */ -#define RECENTCY_MIN 5 +#define RECENTCY_MIN 60 #define RECENTCY_DIRTY 300 static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) @@ -1062,7 +1065,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, if (sbi->s_log_groups_per_flex) { ext4_group_t f = ext4_flex_group(sbi, group); - atomic_inc(&sbi->s_flex_groups[f].used_dirs); + atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, + f)->used_dirs); } } if (ext4_has_group_desc_csum(sb)) { @@ -1085,7 +1089,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, if (sbi->s_log_groups_per_flex) { flex_group = ext4_flex_group(sbi, group); - atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); + atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_inodes); } inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); @@ -1234,7 +1239,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) if (!ext4_test_bit(bit, bitmap_bh->b_data)) goto bad_orphan; - inode = ext4_iget(sb, ino); + inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); if (IS_ERR(inode)) { err = PTR_ERR(inode); ext4_error(sb, "couldn't read orphan inode %lu (err %d)", diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 48660fbb21a9a6dd14685de67f04efa6eca25582..2dd1114d5f6c86426cb8570c5bf140d148132f9b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -408,6 +408,10 @@ static int __check_block_validity(struct inode *inode, const char *func, unsigned int line, struct ext4_map_blocks *map) { + if (ext4_has_feature_journal(inode->i_sb) && + (inode->i_ino == + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) + return 0; if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, map->m_len)) { ext4_error_inode(inode, func, line, map->m_pblk, @@ -2147,7 +2151,7 @@ static int ext4_writepage(struct page *page, bool keep_towrite = false; if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { - ext4_invalidatepage(page, 0, PAGE_SIZE); + inode->i_mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); return -EIO; } @@ -2593,7 +2597,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, * truncate are avoided by checking i_size under i_data_sem. */ disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; - if (disksize > EXT4_I(inode)->i_disksize) { + if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { int err2; loff_t i_size; @@ -2763,7 +2767,7 @@ static int ext4_writepages(struct address_space *mapping, if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; - percpu_down_read(&sbi->s_journal_flag_rwsem); + percpu_down_read(&sbi->s_writepages_rwsem); trace_ext4_writepages(inode, wbc); if (dax_mapping(mapping)) { @@ -2993,7 +2997,7 @@ static int ext4_writepages(struct address_space *mapping, out_writepages: trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); - percpu_up_read(&sbi->s_journal_flag_rwsem); + percpu_up_read(&sbi->s_writepages_rwsem); return ret; } @@ -3378,7 +3382,8 @@ static int ext4_readpage(struct file *file, struct page *page) ret = ext4_readpage_inline(inode, page); if (ret == -EAGAIN) - return ext4_mpage_readpages(page->mapping, NULL, page, 1); + return ext4_mpage_readpages(page->mapping, NULL, page, 1, + false); return ret; } @@ -3393,7 +3398,7 @@ ext4_readpages(struct file *file, struct address_space *mapping, if (ext4_has_inline_data(inode)) return 0; - return ext4_mpage_readpages(mapping, pages, NULL, nr_pages); + return ext4_mpage_readpages(mapping, pages, NULL, nr_pages, true); } static void ext4_invalidatepage(struct page *page, unsigned int offset, @@ -4672,7 +4677,7 @@ static int __ext4_get_inode_loc(struct inode *inode, if (end > table) end = table; while (b <= end) - sb_breadahead(sb, b++); + sb_breadahead_unmovable(sb, b++); } /* @@ -4781,7 +4786,9 @@ int ext4_get_projid(struct inode *inode, kprojid_t *projid) return 0; } -struct inode *ext4_iget(struct super_block *sb, unsigned long ino) +struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, + ext4_iget_flags flags, const char *function, + unsigned int line) { struct ext4_iloc iloc; struct ext4_inode *raw_inode; @@ -4795,6 +4802,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) gid_t i_gid; projid_t i_projid; + if ((!(flags & EXT4_IGET_SPECIAL) && + (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) || + (ino < EXT4_ROOT_INO) || + (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) { + if (flags & EXT4_IGET_HANDLE) + return ERR_PTR(-ESTALE); + __ext4_error(sb, function, line, + "inode #%lu: comm %s: iget: illegal inode #", + ino, current->comm); + return ERR_PTR(-EFSCORRUPTED); + } + inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); @@ -4810,18 +4829,26 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) raw_inode = ext4_raw_inode(&iloc); if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) { - EXT4_ERROR_INODE(inode, "root inode unallocated"); + ext4_error_inode(inode, function, line, 0, + "iget: root inode unallocated"); ret = -EFSCORRUPTED; goto bad_inode; } + if ((flags & EXT4_IGET_HANDLE) && + (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) { + ret = -ESTALE; + goto bad_inode; + } + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > EXT4_INODE_SIZE(inode->i_sb) || (ei->i_extra_isize & 3)) { - EXT4_ERROR_INODE(inode, - "bad extra_isize %u (inode size %u)", + ext4_error_inode(inode, function, line, 0, + "iget: bad extra_isize %u " + "(inode size %u)", ei->i_extra_isize, EXT4_INODE_SIZE(inode->i_sb)); ret = -EFSCORRUPTED; @@ -4843,7 +4870,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) } if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { - EXT4_ERROR_INODE(inode, "checksum invalid"); + ext4_error_inode(inode, function, line, 0, + "iget: checksum invalid"); ret = -EFSBADCRC; goto bad_inode; } @@ -4900,7 +4928,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(sb, raw_inode); if ((size = i_size_read(inode)) < 0) { - EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); + ext4_error_inode(inode, function, line, 0, + "iget: bad i_size value: %lld", size); + ret = -EFSCORRUPTED; + goto bad_inode; + } + /* + * If dir_index is not enabled but there's dir with INDEX flag set, + * we'd normally treat htree data as empty space. But with metadata + * checksumming that corrupts checksums so forbid that. + */ + if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) && + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { + EXT4_ERROR_INODE(inode, + "iget: Dir with htree data on filesystem without dir_index feature."); ret = -EFSCORRUPTED; goto bad_inode; } @@ -4974,7 +5015,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = 0; if (ei->i_file_acl && !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { - EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", + ext4_error_inode(inode, function, line, 0, + "iget: bad extended attribute block %llu", ei->i_file_acl); ret = -EFSCORRUPTED; goto bad_inode; @@ -5029,7 +5071,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) make_bad_inode(inode); } else { ret = -EFSCORRUPTED; - EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); + ext4_error_inode(inode, function, line, 0, + "iget: bogus i_mode (%o)", inode->i_mode); goto bad_inode; } if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) @@ -5046,19 +5089,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) return ERR_PTR(ret); } -struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino) -{ - if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) - return ERR_PTR(-EFSCORRUPTED); - return ext4_iget(sb, ino); -} - static int ext4_inode_blocks_set(handle_t *handle, struct ext4_inode *raw_inode, struct ext4_inode_info *ei) { struct inode *inode = &(ei->vfs_inode); - u64 i_blocks = inode->i_blocks; + u64 i_blocks = READ_ONCE(inode->i_blocks); struct super_block *sb = inode->i_sb; if (i_blocks <= ~0U) { @@ -6130,7 +6166,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) } } - percpu_down_write(&sbi->s_journal_flag_rwsem); + percpu_down_write(&sbi->s_writepages_rwsem); jbd2_journal_lock_updates(journal); /* @@ -6147,7 +6183,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) err = jbd2_journal_flush(journal); if (err < 0) { jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); ext4_inode_resume_unlocked_dio(inode); return err; } @@ -6156,7 +6192,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); if (val) up_write(&EXT4_I(inode)->i_mmap_sem); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f852d90a15629b2dfe061c7ff3978cd667481596..e7c7a6737a4667765c6764f35ce8fe3723a0cc13 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -111,7 +111,7 @@ static long swap_inode_boot_loader(struct super_block *sb, if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) return -EPERM; - inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO); + inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL); if (IS_ERR(inode_bl)) return PTR_ERR(inode_bl); ei_bl = EXT4_I(inode_bl); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3ba9a4ae4eacdd079684386658884e196ca7daad..d7cedfaa1cc08b7ef239b67e268c8634345caebb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1952,7 +1952,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, int free; free = e4b->bd_info->bb_free; - BUG_ON(free <= 0); + if (WARN_ON(free <= 0)) + return; i = e4b->bd_info->bb_first_free; @@ -1973,7 +1974,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, } mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex); - BUG_ON(ex.fe_len <= 0); + if (WARN_ON(ex.fe_len <= 0)) + break; if (free < ex.fe_len) { ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " @@ -2389,7 +2391,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned size; - struct ext4_group_info ***new_groupinfo; + struct ext4_group_info ***old_groupinfo, ***new_groupinfo; size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); @@ -2402,13 +2404,16 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; } - if (sbi->s_group_info) { - memcpy(new_groupinfo, sbi->s_group_info, + rcu_read_lock(); + old_groupinfo = rcu_dereference(sbi->s_group_info); + if (old_groupinfo) + memcpy(new_groupinfo, old_groupinfo, sbi->s_group_info_size * sizeof(*sbi->s_group_info)); - kvfree(sbi->s_group_info); - } - sbi->s_group_info = new_groupinfo; + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_group_info, new_groupinfo); sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); + if (old_groupinfo) + ext4_kvfree_array_rcu(old_groupinfo); ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", sbi->s_group_info_size); return 0; @@ -2420,6 +2425,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, { int i; int metalen = 0; + int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb); struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_info **meta_group_info; struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2438,12 +2444,12 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, "for a buddy group"); goto exit_meta_group_info; } - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = - meta_group_info; + rcu_read_lock(); + rcu_dereference(sbi->s_group_info)[idx] = meta_group_info; + rcu_read_unlock(); } - meta_group_info = - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; + meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx); i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); @@ -2491,8 +2497,13 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, exit_group_info: /* If a meta_group_info table has been allocated, release it now */ if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { - kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; + struct ext4_group_info ***group_info; + + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); + kfree(group_info[idx]); + group_info[idx] = NULL; + rcu_read_unlock(); } exit_meta_group_info: return -ENOMEM; @@ -2505,6 +2516,7 @@ static int ext4_mb_init_backend(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); int err; struct ext4_group_desc *desc; + struct ext4_group_info ***group_info; struct kmem_cache *cachep; err = ext4_mb_alloc_groupinfo(sb, ngroups); @@ -2539,11 +2551,16 @@ static int ext4_mb_init_backend(struct super_block *sb) while (i-- > 0) kmem_cache_free(cachep, ext4_get_group_info(sb, i)); i = sbi->s_group_info_size; + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); while (i-- > 0) - kfree(sbi->s_group_info[i]); + kfree(group_info[i]); + rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: - kvfree(sbi->s_group_info); + rcu_read_lock(); + kvfree(rcu_dereference(sbi->s_group_info)); + rcu_read_unlock(); return -ENOMEM; } @@ -2733,7 +2750,7 @@ int ext4_mb_release(struct super_block *sb) ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int num_meta_group_infos; - struct ext4_group_info *grinfo; + struct ext4_group_info *grinfo, ***group_info; struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2751,9 +2768,12 @@ int ext4_mb_release(struct super_block *sb) num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); for (i = 0; i < num_meta_group_infos; i++) - kfree(sbi->s_group_info[i]); - kvfree(sbi->s_group_info); + kfree(group_info[i]); + kvfree(group_info); + rcu_read_unlock(); } kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); @@ -3052,7 +3072,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); atomic64_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -4947,7 +4968,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(count_clusters, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) @@ -5092,7 +5114,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 78d45c7d3fa7382d3374916ddbcee4739504dffa..0d785868cc503e82042294eec95462f673143890 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -434,6 +434,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) int ext4_ext_migrate(struct inode *inode) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); handle_t *handle; int retval = 0, i; __le32 *i_data; @@ -458,6 +459,8 @@ int ext4_ext_migrate(struct inode *inode) */ return retval; + percpu_down_write(&sbi->s_writepages_rwsem); + /* * Worst case we can touch the allocation bitmaps, a bgd * block, and a block to link in the orphan list. We do need @@ -468,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(handle)) { retval = PTR_ERR(handle); - return retval; + goto out_unlock; } goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; @@ -479,7 +482,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(tmp_inode)) { retval = PTR_ERR(tmp_inode); ext4_journal_stop(handle); - return retval; + goto out_unlock; } i_size_write(tmp_inode, i_size_read(inode)); /* @@ -521,7 +524,7 @@ int ext4_ext_migrate(struct inode *inode) */ ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); - goto out; + goto out_tmp_inode; } ei = EXT4_I(inode); @@ -602,10 +605,11 @@ int ext4_ext_migrate(struct inode *inode) /* Reset the extent details */ ext4_ext_tree_init(handle, tmp_inode); ext4_journal_stop(handle); -out: +out_tmp_inode: unlock_new_inode(tmp_inode); iput(tmp_inode); - +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return retval; } @@ -615,7 +619,8 @@ int ext4_ext_migrate(struct inode *inode) int ext4_ind_migrate(struct inode *inode) { struct ext4_extent_header *eh; - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_super_block *es = sbi->s_es; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; @@ -639,9 +644,13 @@ int ext4_ind_migrate(struct inode *inode) if (test_opt(inode->i_sb, DELALLOC)) ext4_alloc_da_blocks(inode); + percpu_down_write(&sbi->s_writepages_rwsem); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_unlock; + } down_write(&EXT4_I(inode)->i_data_sem); ret = ext4_ext_check_inode(inode); @@ -676,5 +685,7 @@ int ext4_ind_migrate(struct inode *inode) errout: ext4_journal_stop(handle); up_write(&EXT4_I(inode)->i_data_sem); +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return ret; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 38e6a846aac16a463249dddba9d0cd7eb36ccff6..0c042bd43246e43992b35ec4153bcbbdf71253db 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -120,10 +120,10 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, { __ext4_warning(sb, function, line, "%s", msg); __ext4_warning(sb, function, line, - "MMP failure info: last update time: %llu, last update " - "node: %s, last update device: %s", - (long long unsigned int) le64_to_cpu(mmp->mmp_time), - mmp->mmp_nodename, mmp->mmp_bdevname); + "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s", + (unsigned long long)le64_to_cpu(mmp->mmp_time), + (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename, + (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname); } /* @@ -154,6 +154,7 @@ static int kmmpd(void *data) mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, EXT4_MMP_MIN_CHECK_INTERVAL); mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); + BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); bdevname(bh->b_bdev, mmp->mmp_bdevname); memcpy(mmp->mmp_nodename, init_utsname()->nodename, @@ -375,7 +376,8 @@ int ext4_multi_mount_protect(struct super_block *sb, /* * Start a kernel thread to update the MMP block periodically. */ - EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", + EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s", + (int)sizeof(mmp->mmp_bdevname), bdevname(bh->b_bdev, mmp->mmp_bdevname)); if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 42177a426ca901388ef206f506262748fccbc0f5..0e6a7cb9e9cf0faa9ae55448faf4b3971c1f2a1b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1280,8 +1280,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) int ext4_ci_compare(const struct inode *parent, const struct qstr *name, const struct qstr *entry, bool quick) { - const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct super_block *sb = parent->i_sb; + const struct unicode_map *um = sb->s_encoding; int ret; if (quick) @@ -1293,7 +1293,7 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, /* Handle invalid character sequence as either an error * or as an opaque byte sequence. */ - if (ext4_has_strict_mode(sbi)) + if (sb_has_enc_strict_mode(sb)) return -EINVAL; if (name->len != entry->len) @@ -1310,7 +1310,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, { int len; - if (!IS_CASEFOLDED(dir) || !EXT4_SB(dir->i_sb)->s_encoding) { + if (!needs_casefold(dir)) { cf_name->name = NULL; return; } @@ -1319,7 +1319,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, if (!cf_name->name) return; - len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding, + len = utf8_casefold(dir->i_sb->s_encoding, iname, cf_name->name, EXT4_NAME_LEN); if (len <= 0) { @@ -1356,7 +1356,7 @@ static inline bool ext4_match(const struct inode *parent, #endif #ifdef CONFIG_UNICODE - if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) { + if (needs_casefold(parent)) { if (fname->cf_name.name) { struct qstr cf = {.name = fname->cf_name.name, .len = fname->cf_name.len}; @@ -1505,6 +1505,7 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir, /* * We deal with the read-ahead logic here. */ + cond_resched(); if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; @@ -1696,7 +1697,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi dentry); return ERR_PTR(-EFSCORRUPTED); } - inode = ext4_iget_normal(dir->i_sb, ino); + inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL); if (inode == ERR_PTR(-ESTALE)) { EXT4_ERROR_INODE(dir, "deleted inode referenced: %u", @@ -1749,7 +1750,7 @@ struct dentry *ext4_get_parent(struct dentry *child) return ERR_PTR(-EFSCORRUPTED); } - return d_obtain_alias(ext4_iget_normal(child->d_sb, ino)); + return d_obtain_alias(ext4_iget(child->d_sb, ino, EXT4_IGET_NORMAL)); } /* @@ -2172,7 +2173,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct ext4_dir_entry_2 *de; struct ext4_dir_entry_tail *t; struct super_block *sb; - struct ext4_sb_info *sbi; struct ext4_filename fname; int retval; int dx_fallback=0; @@ -2184,14 +2184,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, csum_size = sizeof(struct ext4_dir_entry_tail); sb = dir->i_sb; - sbi = EXT4_SB(sb); blocksize = sb->s_blocksize; if (!dentry->d_name.len) return -EINVAL; #ifdef CONFIG_UNICODE - if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) && - sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name)) + if (sb_has_enc_strict_mode(sb) && IS_CASEFOLDED(dir) && + sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name)) return -EINVAL; #endif @@ -2213,6 +2212,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; + /* Can we just ignore htree data? */ + if (ext4_has_metadata_csum(sb)) { + EXT4_ERROR_INODE(dir, + "Directory has corrupted htree index."); + retval = -EFSCORRUPTED; + goto out; + } ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; ext4_mark_inode_dirty(handle, dir); diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index a98156dca3df3b8c42e2d6e898f0d0e2d3126f91..49806a7bcecf83264f2c758e1542148b7829479a 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -250,7 +250,7 @@ ext4_submit_bio_read(struct bio *bio) int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages) + unsigned nr_pages, bool is_readahead) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 4f7cd78d0364730d268a608f47142c039976ecfd..19af346a66514d51ff05d9d1659d4bf700fb02db 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -17,6 +17,33 @@ #include "ext4_jbd2.h" +struct ext4_rcu_ptr { + struct rcu_head rcu; + void *ptr; +}; + +static void ext4_rcu_ptr_callback(struct rcu_head *head) +{ + struct ext4_rcu_ptr *ptr; + + ptr = container_of(head, struct ext4_rcu_ptr, rcu); + kvfree(ptr->ptr); + kfree(ptr); +} + +void ext4_kvfree_array_rcu(void *to_free) +{ + struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); + + if (ptr) { + ptr->ptr = to_free; + call_rcu(&ptr->rcu, ext4_rcu_ptr_callback); + return; + } + synchronize_rcu(); + kvfree(to_free); +} + int ext4_resize_begin(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -545,8 +572,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb, brelse(gdb); goto out; } - memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, - gdb->b_size); + memcpy(gdb->b_data, sbi_array_rcu_deref(sbi, + s_group_desc, j)->b_data, gdb->b_size); set_buffer_uptodate(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); @@ -854,13 +881,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, } brelse(dind); - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); err = ext4_handle_dirty_super(handle, sb); @@ -904,9 +933,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; BUFFER_TRACE(gdb_bh, "get_write_access"); @@ -917,9 +948,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); return err; } @@ -1183,7 +1214,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, * use non-sparse filesystems anymore. This is already checked above. */ if (gdb_off) { - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); @@ -1265,7 +1297,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, /* * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). */ - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)(gdb_bh->b_data + gdb_off * EXT4_DESC_SIZE(sb)); @@ -1393,11 +1425,14 @@ static void ext4_update_super(struct super_block *sb, percpu_counter_read(&sbi->s_freeclusters_counter)); if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { ext4_group_t flex_group; + struct flex_groups *fg; + flex_group = ext4_flex_group(sbi, group_data[0].group); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), - &sbi->s_flex_groups[flex_group].free_clusters); + &fg->free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, - &sbi->s_flex_groups[flex_group].free_inodes); + &fg->free_inodes); } /* @@ -1492,7 +1527,8 @@ static int ext4_flex_group_add(struct super_block *sb, for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); if (old_gdb == gdb_bh->b_blocknr) continue; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, @@ -1616,7 +1652,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) "No reserved GDT blocks, can't resize"); return -EPERM; } - inode = ext4_iget(sb, EXT4_RESIZE_INO); + inode = ext4_iget(sb, EXT4_RESIZE_INO, EXT4_IGET_SPECIAL); if (IS_ERR(inode)) { ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(inode); @@ -1944,7 +1980,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) } if (!resize_inode) - resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); + resize_inode = ext4_iget(sb, EXT4_RESIZE_INO, + EXT4_IGET_SPECIAL); if (IS_ERR(resize_inode)) { ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(resize_inode); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e889ae6bd23953383291095856df347850103a32..c0e405bdab46086aacbcd2e0dd137505eaf74113 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -371,7 +371,8 @@ static void save_error_info(struct super_block *sb, const char *func, unsigned int line) { __save_error_info(sb, func, line); - ext4_commit_super(sb, 1); + if (!bdev_read_only(sb->s_bdev)) + ext4_commit_super(sb, 1); } /* @@ -902,6 +903,8 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + struct buffer_head **group_desc; + struct flex_groups **flex_groups; int aborted = 0; int i, err; @@ -933,15 +936,23 @@ static void ext4_put_super(struct super_block *sb) if (!sb_rdonly(sb)) ext4_commit_super(sb, 1); + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < sbi->s_gdb_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); - kvfree(sbi->s_flex_groups); + brelse(group_desc[i]); + kvfree(group_desc); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } + rcu_read_unlock(); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(get_qf_name(sb, sbi, i)); @@ -990,7 +1001,7 @@ static void ext4_put_super(struct super_block *sb) kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif kfree(sbi); } @@ -1125,20 +1136,11 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, { struct inode *inode; - if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) - return ERR_PTR(-ESTALE); - if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) - return ERR_PTR(-ESTALE); - - /* iget isn't really right if the inode is currently unallocated!! - * - * ext4_read_inode will return a bad_inode if the inode had been - * deleted, so we should be safe. - * + /* * Currently we don't know the generation for parent directory, so * a generation of 0 means "accept any" */ - inode = ext4_iget_normal(sb, ino); + inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE); if (IS_ERR(inode)) return ERR_CAST(inode); if (generation && inode->i_generation != generation) { @@ -2275,8 +2277,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct flex_groups *new_groups; - int size; + struct flex_groups **old_groups, **new_groups; + int size, i, j; if (!sbi->s_log_groups_per_flex) return 0; @@ -2285,22 +2287,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) if (size <= sbi->s_flex_groups_allocated) return 0; - size = roundup_pow_of_two(size * sizeof(struct flex_groups)); - new_groups = kvzalloc(size, GFP_KERNEL); + new_groups = kvzalloc(roundup_pow_of_two(size * + sizeof(*sbi->s_flex_groups)), GFP_KERNEL); if (!new_groups) { - ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", - size / (int) sizeof(struct flex_groups)); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex group pointers", size); return -ENOMEM; } - - if (sbi->s_flex_groups) { - memcpy(new_groups, sbi->s_flex_groups, - (sbi->s_flex_groups_allocated * - sizeof(struct flex_groups))); - kvfree(sbi->s_flex_groups); + for (i = sbi->s_flex_groups_allocated; i < size; i++) { + new_groups[i] = kvzalloc(roundup_pow_of_two( + sizeof(struct flex_groups)), + GFP_KERNEL); + if (!new_groups[i]) { + for (j = sbi->s_flex_groups_allocated; j < i; j++) + kvfree(new_groups[j]); + kvfree(new_groups); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex groups", size); + return -ENOMEM; + } } - sbi->s_flex_groups = new_groups; - sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); + rcu_read_lock(); + old_groups = rcu_dereference(sbi->s_flex_groups); + if (old_groups) + memcpy(new_groups, old_groups, + (sbi->s_flex_groups_allocated * + sizeof(struct flex_groups *))); + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_flex_groups, new_groups); + sbi->s_flex_groups_allocated = size; + if (old_groups) + ext4_kvfree_array_rcu(old_groups); return 0; } @@ -2308,6 +2325,7 @@ static int ext4_fill_flex_info(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; + struct flex_groups *fg; ext4_group_t flex_group; int i, err; @@ -2325,12 +2343,11 @@ static int ext4_fill_flex_info(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); flex_group = ext4_flex_group(sbi, i); - atomic_add(ext4_free_inodes_count(sb, gdp), - &sbi->s_flex_groups[flex_group].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); + atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes); atomic64_add(ext4_free_group_clusters(sb, gdp), - &sbi->s_flex_groups[flex_group].free_clusters); - atomic_add(ext4_used_dirs_count(sb, gdp), - &sbi->s_flex_groups[flex_group].used_dirs); + &fg->free_clusters); + atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs); } return 1; @@ -2920,17 +2937,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#ifndef CONFIG_QUOTA - if (ext4_has_feature_quota(sb) && !readonly) { - ext4_msg(sb, KERN_ERR, - "Filesystem with quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); - return 0; - } - if (ext4_has_feature_project(sb) && !readonly) { +#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) + if (!readonly && (ext4_has_feature_quota(sb) || + ext4_has_feature_project(sb))) { ext4_msg(sb, KERN_ERR, - "Filesystem with project quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); + "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2"); return 0; } #endif /* CONFIG_QUOTA */ @@ -3502,7 +3513,8 @@ int ext4_calculate_overhead(struct super_block *sb) */ if (sbi->s_journal && !sbi->journal_bdev) overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); - else if (ext4_has_feature_journal(sb) && !sbi->s_journal) { + else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) { + /* j_inum for internal journal is non-zero */ j_inode = ext4_get_journal_inode(sb, j_inum); if (j_inode) { j_blocks = j_inode->i_size >> sb->s_blocksize_bits; @@ -3552,9 +3564,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) { struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); char *orig_data = kstrdup(data, GFP_KERNEL); - struct buffer_head *bh; + struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + struct flex_groups **flex_groups; ext4_fsblk_t block; ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; @@ -3725,6 +3738,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + if (blocksize < EXT4_MIN_BLOCK_SIZE || + blocksize > EXT4_MAX_BLOCK_SIZE) { + ext4_msg(sb, KERN_ERR, + "Unsupported filesystem blocksize %d (%d log_block_size)", + blocksize, le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; @@ -3742,6 +3764,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_ERR, "unsupported inode size: %d", sbi->s_inode_size); + ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize); goto failed_mount; } /* @@ -3804,7 +3827,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; #ifdef CONFIG_UNICODE - if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) { + if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { const struct ext4_sb_encodings *encoding_info; struct unicode_map *encoding; __u16 encoding_flags; @@ -3835,8 +3858,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "%s-%s with flags 0x%hx", encoding_info->name, encoding_info->version?:"\b", encoding_flags); - sbi->s_encoding = encoding; - sbi->s_encoding_flags = encoding_flags; + sb->s_encoding = encoding; + sb->s_encoding_flags = encoding_flags; } #endif @@ -3942,14 +3965,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) goto failed_mount; - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - if (blocksize < EXT4_MIN_BLOCK_SIZE || - blocksize > EXT4_MAX_BLOCK_SIZE) { - ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d (%d log_block_size)", - blocksize, le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } if (le32_to_cpu(es->s_log_block_size) > (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { ext4_msg(sb, KERN_ERR, @@ -4041,7 +4056,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || sbi->s_inodes_per_group > blocksize * 8) { ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", - sbi->s_blocks_per_group); + sbi->s_inodes_per_group); goto failed_mount; } sbi->s_itb_per_group = sbi->s_inodes_per_group / @@ -4172,9 +4187,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_BLOCKS_PER_GROUP(sb) - 1); do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { - ext4_msg(sb, KERN_WARNING, "groups count too large: %u " + ext4_msg(sb, KERN_WARNING, "groups count too large: %llu " "(block count %llu, first data block %u, " - "blocks per group %lu)", sbi->s_groups_count, + "blocks per group %lu)", blocks_count, ext4_blocks_count(es), le32_to_cpu(es->s_first_data_block), EXT4_BLOCKS_PER_GROUP(sb)); @@ -4202,9 +4217,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } } - sbi->s_group_desc = kvmalloc(db_count * + rcu_assign_pointer(sbi->s_group_desc, + kvmalloc_array(db_count, sizeof(struct buffer_head *), - GFP_KERNEL); + GFP_KERNEL)); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); ret = -ENOMEM; @@ -4216,18 +4232,23 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Pre-read the descriptors into the buffer cache */ for (i = 0; i < db_count; i++) { block = descriptor_loc(sb, logical_sb_block, i); - sb_breadahead(sb, block); + sb_breadahead_unmovable(sb, block); } for (i = 0; i < db_count; i++) { + struct buffer_head *bh; + block = descriptor_loc(sb, logical_sb_block, i); - sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); - if (!sbi->s_group_desc[i]) { + bh = sb_bread_unmovable(sb, block); + if (!bh) { ext4_msg(sb, KERN_ERR, "can't read group descriptor %d", i); db_count = i; goto failed_mount2; } + rcu_read_lock(); + rcu_dereference(sbi->s_group_desc)[i] = bh; + rcu_read_unlock(); } sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { @@ -4445,7 +4466,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * so we can safely mount the rest of the filesystem now. */ - root = ext4_iget(sb, EXT4_ROOT_INO); + root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL); if (IS_ERR(root)) { ext4_msg(sb, KERN_ERR, "get root inode failed"); ret = PTR_ERR(root); @@ -4459,7 +4480,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } #ifdef CONFIG_UNICODE - if (sbi->s_encoding) + if (sb->s_encoding) sb->s_d_op = &ext4_dentry_ops; #endif @@ -4510,7 +4531,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, GFP_KERNEL); if (!err) - err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); + err = percpu_init_rwsem(&sbi->s_writepages_rwsem); if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); @@ -4598,13 +4619,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_unregister_li_request(sb); failed_mount6: ext4_mb_release(sb); - if (sbi->s_flex_groups) - kvfree(sbi->s_flex_groups); + rcu_read_lock(); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } + rcu_read_unlock(); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); failed_mount5: ext4_ext_release(sb); ext4_release_system_zone(sb); @@ -4635,15 +4662,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif #ifdef CONFIG_QUOTA @@ -4697,7 +4727,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, * happen if we iget() an unused inode, as the subsequent iput() * will try to delete it. */ - journal_inode = ext4_iget(sb, journal_inum); + journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL); if (IS_ERR(journal_inode)) { ext4_msg(sb, KERN_ERR, "no journal found"); return NULL; @@ -5789,7 +5819,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, if (!qf_inums[type]) return -EPERM; - qf_inode = ext4_iget(sb, qf_inums[type]); + qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL); if (IS_ERR(qf_inode)) { ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); return PTR_ERR(qf_inode); diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index d0d8a9795dd627c517acca997d083b002ff3a1e0..bd717248a4bcbea8d1ef2b9fd58971dc14db4afa 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -342,12 +342,57 @@ static int ext4_get_verity_descriptor(struct inode *inode, void *buf, return desc_size; } +/* + * Prefetch some pages from the file's Merkle tree. + * + * This is basically a stripped-down version of __do_page_cache_readahead() + * which works on pages past i_size. + */ +static void ext4_merkle_tree_readahead(struct address_space *mapping, + pgoff_t start_index, unsigned long count) +{ + LIST_HEAD(pages); + unsigned int nr_pages = 0; + struct page *page; + pgoff_t index; + struct blk_plug plug; + + for (index = start_index; index < start_index + count; index++) { + rcu_read_lock(); + page = radix_tree_lookup(&mapping->page_tree, index); + rcu_read_unlock(); + if (!page || radix_tree_exceptional_entry(page)) { + page = __page_cache_alloc(readahead_gfp_mask(mapping)); + if (!page) + break; + page->index = index; + list_add(&page->lru, &pages); + nr_pages++; + } + } + blk_start_plug(&plug); + ext4_mpage_readpages(mapping, &pages, NULL, nr_pages, true); + blk_finish_plug(&plug); +} + static struct page *ext4_read_merkle_tree_page(struct inode *inode, - pgoff_t index) + pgoff_t index, + unsigned long num_ra_pages) { + struct page *page; + index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; - return read_mapping_page(inode->i_mapping, index, NULL); + page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else if (num_ra_pages > 1) + ext4_merkle_tree_readahead(inode->i_mapping, index, + num_ra_pages); + page = read_mapping_page(inode->i_mapping, index, NULL); + } + return page; } static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index f2fde3ac8698943fddeaf7a4c7b75eede099346f..b0873b89dc879ff695c3dd93ca4d22a2404029f7 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -383,7 +383,7 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, struct inode *inode; int err; - inode = ext4_iget(parent->i_sb, ea_ino); + inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL); if (IS_ERR(inode)) { err = PTR_ERR(inode); ext4_error(parent->i_sb, @@ -1486,7 +1486,8 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value, } while (ce) { - ea_inode = ext4_iget(inode->i_sb, ce->e_value); + ea_inode = ext4_iget(inode->i_sb, ce->e_value, + EXT4_IGET_NORMAL); if (!IS_ERR(ea_inode) && !is_bad_inode(ea_inode) && (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) && diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 9bb02d446d44a1a95502c8fadc05c26c43c839f4..675af7cd29d3357fc0377088375bedd0664ed019 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -129,3 +129,12 @@ config F2FS_FS_LZ4 default y help Support LZ4 compress algorithm, if unsure, say Y. + +config F2FS_FS_ZSTD + bool "ZSTD compression support" + depends on F2FS_FS_COMPRESSION + select ZSTD_COMPRESS + select ZSTD_DECOMPRESS + default y + help + Support ZSTD compress algorithm, if unsure, say Y. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 311bb8091710f6038d8693f0a729dd9fa6c60724..1b0d57b0d00cfb68d4d5784ed2a0090d9ff11cc0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -50,9 +50,6 @@ struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) return page; } -/* - * We guarantee no failure on the returned page. - */ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, bool is_meta) { @@ -89,6 +86,8 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, return ERR_PTR(err); } + f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); + lock_page(page); if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); @@ -206,7 +205,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, } /* - * Readahead CP/NAT/SIT/SSA pages + * Readahead CP/NAT/SIT/SSA/POR pages */ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync) @@ -223,6 +222,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .is_por = (type == META_POR), }; struct blk_plug plug; + int err; if (unlikely(type == META_POR)) fio.op_flags &= ~REQ_META; @@ -266,8 +266,11 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, } fio.page = page; - f2fs_submit_page_bio(&fio); - f2fs_put_page(page, 0); + err = f2fs_submit_page_bio(&fio); + f2fs_put_page(page, err ? 1 : 0); + + if (!err) + f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); } out: blk_finish_plug(&plug); @@ -898,7 +901,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) return -ENOMEM; /* * Finding out valid cp block involves read both - * sets( cp pack1 and cp pack 2) + * sets( cp pack 1 and cp pack 2) */ cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); @@ -1245,20 +1248,20 @@ static void unblock_operations(struct f2fs_sb_info *sbi) f2fs_unlock_all(sbi); } -void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) +void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) { DEFINE_WAIT(wait); for (;;) { prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); - if (!get_pages(sbi, F2FS_WB_CP_DATA)) + if (!get_pages(sbi, type)) break; if (unlikely(f2fs_cp_error(sbi))) break; - io_schedule_timeout(5*HZ); + io_schedule_timeout(DEFAULT_IO_TIMEOUT); } finish_wait(&sbi->cp_wait, &wait); } @@ -1296,10 +1299,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || - is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); + if (is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) + __set_ckpt_flags(ckpt, CP_RESIZEFS_FLAG); + else + __clear_ckpt_flags(ckpt, CP_RESIZEFS_FLAG); + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) __set_ckpt_flags(ckpt, CP_DISABLED_FLAG); else @@ -1379,13 +1386,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT/SIT pages */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && - !f2fs_cp_error(sbi)); - /* - * modify checkpoint - * version number is already updated - */ + /* start to update checkpoint, cp ver is already updated previously */ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { @@ -1488,11 +1490,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && - !f2fs_cp_error(sbi)); + /* Wait for all dirty meta pages to be submitted for IO */ + f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META); /* wait for previous submitted meta pages writeback */ - f2fs_wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); /* flush all device cache */ err = f2fs_flush_device_cache(sbi); @@ -1501,7 +1503,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* barrier and flush checkpoint cp pack 2 page if it can */ commit_checkpoint(sbi, ckpt, start_blk); - f2fs_wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); /* * invalidate intermediate page cache borrowed from meta inode which are @@ -1538,9 +1540,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0; } -/* - * We guarantee that this checkpoint procedure will not fail. - */ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1608,7 +1607,6 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_flush_sit_entries(sbi, cpc); - /* unlock all the fs_lock[] in do_checkpoint() */ err = do_checkpoint(sbi, cpc); if (err) f2fs_release_discard_addrs(sbi); @@ -1621,7 +1619,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason & CP_RECOVERY) f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver); - /* do checkpoint periodically */ + /* update CP_TIME to trigger checkpoint periodically */ f2fs_update_time(sbi, CP_TIME); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index f335635bb1aa4663e9d6eed13396c771f19cc1a5..f5a747d8b044bbd881650dce1d96875afd609953 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -25,6 +26,8 @@ struct f2fs_compress_ops { int (*init_compress_ctx)(struct compress_ctx *cc); void (*destroy_compress_ctx)(struct compress_ctx *cc); int (*compress_pages)(struct compress_ctx *cc); + int (*init_decompress_ctx)(struct decompress_io_ctx *dic); + void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic); int (*decompress_pages)(struct decompress_io_ctx *dic); }; @@ -57,7 +60,7 @@ bool f2fs_is_compressed_page(struct page *page) } static void f2fs_set_compressed_page(struct page *page, - struct inode *inode, pgoff_t index, void *data, refcount_t *r) + struct inode *inode, pgoff_t index, void *data) { SetPagePrivate(page); set_page_private(page, (unsigned long)data); @@ -65,8 +68,6 @@ static void f2fs_set_compressed_page(struct page *page, /* i_crypto_info and iv index */ page->index = index; page->mapping = inode->i_mapping; - if (r) - refcount_inc(r); } static void f2fs_put_compressed_page(struct page *page) @@ -296,6 +297,165 @@ static const struct f2fs_compress_ops f2fs_lz4_ops = { }; #endif +#ifdef CONFIG_F2FS_FS_ZSTD +#define F2FS_ZSTD_DEFAULT_CLEVEL 1 + +static int zstd_init_compress_ctx(struct compress_ctx *cc) +{ + ZSTD_parameters params; + ZSTD_CStream *stream; + void *workspace; + unsigned int workspace_size; + + params = ZSTD_getParams(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen, 0); + workspace_size = ZSTD_CStreamWorkspaceBound(params.cParams); + + workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode), + workspace_size, GFP_NOFS); + if (!workspace) + return -ENOMEM; + + stream = ZSTD_initCStream(params, 0, workspace, workspace_size); + if (!stream) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initCStream failed\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__); + kvfree(workspace); + return -EIO; + } + + cc->private = workspace; + cc->private2 = stream; + + cc->clen = cc->rlen - PAGE_SIZE - COMPRESS_HEADER_SIZE; + return 0; +} + +static void zstd_destroy_compress_ctx(struct compress_ctx *cc) +{ + kvfree(cc->private); + cc->private = NULL; + cc->private2 = NULL; +} + +static int zstd_compress_pages(struct compress_ctx *cc) +{ + ZSTD_CStream *stream = cc->private2; + ZSTD_inBuffer inbuf; + ZSTD_outBuffer outbuf; + int src_size = cc->rlen; + int dst_size = src_size - PAGE_SIZE - COMPRESS_HEADER_SIZE; + int ret; + + inbuf.pos = 0; + inbuf.src = cc->rbuf; + inbuf.size = src_size; + + outbuf.pos = 0; + outbuf.dst = cc->cbuf->cdata; + outbuf.size = dst_size; + + ret = ZSTD_compressStream(stream, &outbuf, &inbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + ret = ZSTD_endStream(stream, &outbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_endStream returned %d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + cc->clen = outbuf.pos; + return 0; +} + +static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) +{ + ZSTD_DStream *stream; + void *workspace; + unsigned int workspace_size; + + workspace_size = ZSTD_DStreamWorkspaceBound(MAX_COMPRESS_WINDOW_SIZE); + + workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode), + workspace_size, GFP_NOFS); + if (!workspace) + return -ENOMEM; + + stream = ZSTD_initDStream(MAX_COMPRESS_WINDOW_SIZE, + workspace, workspace_size); + if (!stream) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, + __func__); + kvfree(workspace); + return -EIO; + } + + dic->private = workspace; + dic->private2 = stream; + + return 0; +} + +static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic) +{ + kvfree(dic->private); + dic->private = NULL; + dic->private2 = NULL; +} + +static int zstd_decompress_pages(struct decompress_io_ctx *dic) +{ + ZSTD_DStream *stream = dic->private2; + ZSTD_inBuffer inbuf; + ZSTD_outBuffer outbuf; + int ret; + + inbuf.pos = 0; + inbuf.src = dic->cbuf->cdata; + inbuf.size = dic->clen; + + outbuf.pos = 0; + outbuf.dst = dic->rbuf; + outbuf.size = dic->rlen; + + ret = ZSTD_decompressStream(stream, &outbuf, &inbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + if (dic->rlen != outbuf.pos) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD invalid rlen:%zu, " + "expected:%lu\n", KERN_ERR, + F2FS_I_SB(dic->inode)->sb->s_id, + __func__, dic->rlen, + PAGE_SIZE << dic->log_cluster_size); + return -EIO; + } + + return 0; +} + +static const struct f2fs_compress_ops f2fs_zstd_ops = { + .init_compress_ctx = zstd_init_compress_ctx, + .destroy_compress_ctx = zstd_destroy_compress_ctx, + .compress_pages = zstd_compress_pages, + .init_decompress_ctx = zstd_init_decompress_ctx, + .destroy_decompress_ctx = zstd_destroy_decompress_ctx, + .decompress_pages = zstd_decompress_pages, +}; +#endif + static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #ifdef CONFIG_F2FS_FS_LZO &f2fs_lzo_ops, @@ -307,6 +467,11 @@ static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #else NULL, #endif +#ifdef CONFIG_F2FS_FS_ZSTD + &f2fs_zstd_ops, +#else + NULL, +#endif }; bool f2fs_is_compress_backend_ready(struct inode *inode) @@ -339,9 +504,11 @@ static int f2fs_compress_pages(struct compress_ctx *cc) trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx, cc->cluster_size, fi->i_compress_algorithm); - ret = cops->init_compress_ctx(cc); - if (ret) - goto out; + if (cops->init_compress_ctx) { + ret = cops->init_compress_ctx(cc); + if (ret) + goto out; + } max_len = COMPRESS_HEADER_SIZE + cc->clen; cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); @@ -385,21 +552,27 @@ static int f2fs_compress_pages(struct compress_ctx *cc) } cc->cbuf->clen = cpu_to_le32(cc->clen); - cc->cbuf->chksum = cpu_to_le32(0); for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) cc->cbuf->reserved[i] = cpu_to_le32(0); + nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE); + + /* zero out any unused part of the last page */ + memset(&cc->cbuf->cdata[cc->clen], 0, + (nr_cpages * PAGE_SIZE) - (cc->clen + COMPRESS_HEADER_SIZE)); + vunmap(cc->cbuf); vunmap(cc->rbuf); - nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE); - for (i = nr_cpages; i < cc->nr_cpages; i++) { f2fs_put_compressed_page(cc->cpages[i]); cc->cpages[i] = NULL; } + if (cops->destroy_compress_ctx) + cops->destroy_compress_ctx(cc); + cc->nr_cpages = nr_cpages; trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, @@ -418,7 +591,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc) kfree(cc->cpages); cc->cpages = NULL; destroy_compress_ctx: - cops->destroy_compress_ctx(cc); + if (cops->destroy_compress_ctx) + cops->destroy_compress_ctx(cc); out: trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, cc->clen, ret); @@ -452,10 +626,16 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) goto out_free_dic; } + if (cops->init_decompress_ctx) { + ret = cops->init_decompress_ctx(dic); + if (ret) + goto out_free_dic; + } + dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL); if (!dic->rbuf) { ret = -ENOMEM; - goto out_free_dic; + goto destroy_decompress_ctx; } dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO); @@ -478,7 +658,12 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) vunmap(dic->cbuf); out_vunmap_rbuf: vunmap(dic->rbuf); +destroy_decompress_ctx: + if (cops->destroy_decompress_ctx) + cops->destroy_decompress_ctx(dic); out_free_dic: + if (verity) + refcount_set(&dic->ref, dic->nr_cpages); if (!verity) f2fs_decompress_end_io(dic->rpages, dic->cluster_size, ret, false); @@ -537,8 +722,7 @@ static bool __cluster_may_compress(struct compress_ctx *cc) return true; } -/* return # of compressed block addresses */ -static int f2fs_compressed_blocks(struct compress_ctx *cc) +static int __f2fs_cluster_blocks(struct compress_ctx *cc, bool compr) { struct dnode_of_data dn; int ret; @@ -559,10 +743,15 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc) for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, + blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i); - if (blkaddr != NULL_ADDR) - ret++; + if (compr) { + if (__is_valid_data_blkaddr(blkaddr)) + ret++; + } else { + if (blkaddr != NULL_ADDR) + ret++; + } } } fail: @@ -570,6 +759,18 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc) return ret; } +/* return # of compressed blocks in compressed cluster */ +static int f2fs_compressed_blocks(struct compress_ctx *cc) +{ + return __f2fs_cluster_blocks(cc, true); +} + +/* return # of valid blocks in compressed cluster */ +static int f2fs_cluster_blocks(struct compress_ctx *cc, bool compr) +{ + return __f2fs_cluster_blocks(cc, false); +} + int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) { struct compress_ctx cc = { @@ -579,7 +780,7 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, }; - return f2fs_compressed_blocks(&cc); + return f2fs_cluster_blocks(&cc, false); } static bool cluster_may_compress(struct compress_ctx *cc) @@ -628,7 +829,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, bool prealloc; retry: - ret = f2fs_compressed_blocks(cc); + ret = f2fs_cluster_blocks(cc, false); if (ret <= 0) return ret; @@ -658,7 +859,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, struct bio *bio = NULL; ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, - &last_block_in_bio, false); + &last_block_in_bio, false, true); f2fs_destroy_compress_ctx(cc); if (ret) goto release_pages; @@ -777,7 +978,6 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .encrypted_page = NULL, .compressed_page = NULL, .submitted = false, - .need_lock = LOCK_RETRY, .io_type = io_type, .io_wbc = wbc, .encrypted = f2fs_encrypted_file(cc->inode), @@ -790,16 +990,17 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, loff_t psize; int i, err; - set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + if (!IS_NOQUOTA(inode) && !f2fs_trylock_op(sbi)) + return -EAGAIN; - f2fs_lock_op(sbi); + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); err = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); if (err) goto out_unlock_op; for (i = 0; i < cc->cluster_size; i++) { - if (datablock_addr(dn.inode, dn.node_page, + if (data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i) == NULL_ADDR) goto out_put_dnode; } @@ -818,7 +1019,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; cic->inode = inode; - refcount_set(&cic->ref, 1); + refcount_set(&cic->ref, cc->nr_cpages); cic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << cc->log_cluster_size, GFP_NOFS); if (!cic->rpages) @@ -828,8 +1029,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, for (i = 0; i < cc->nr_cpages; i++) { f2fs_set_compressed_page(cc->cpages[i], inode, - cc->rpages[i + 1]->index, - cic, i ? &cic->ref : NULL); + cc->rpages[i + 1]->index, cic); fio.compressed_page = cc->cpages[i]; if (fio.encrypted) { fio.page = cc->rpages[i + 1]; @@ -849,9 +1049,8 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, for (i = 0; i < cc->cluster_size; i++, dn.ofs_in_node++) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, dn.node_page, - dn.ofs_in_node); - fio.page = cic->rpages[i]; + blkaddr = f2fs_data_blkaddr(&dn); + fio.page = cc->rpages[i]; fio.old_blkaddr = blkaddr; /* cluster header */ @@ -899,12 +1098,13 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); + if (!IS_NOQUOTA(inode)) + f2fs_unlock_op(sbi); - down_write(&fi->i_sem); + spin_lock(&fi->i_size_lock); if (fi->last_disk_size < psize) fi->last_disk_size = psize; - up_write(&fi->i_sem); + spin_unlock(&fi->i_size_lock); f2fs_put_rpages(cc); f2fs_destroy_compress_ctx(cc); @@ -925,7 +1125,8 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, out_put_dnode: f2fs_put_dnode(&dn); out_unlock_op: - f2fs_unlock_op(sbi); + if (!IS_NOQUOTA(inode)) + f2fs_unlock_op(sbi); return -EAGAIN; } @@ -990,24 +1191,30 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, unlock_page(cc->rpages[i]); ret = 0; } else if (ret == -EAGAIN) { + /* + * for quota file, just redirty left pages to + * avoid deadlock caused by cluster update race + * from foreground operation. + */ + if (IS_NOQUOTA(cc->inode)) { + err = 0; + goto out_err; + } ret = 0; cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); lock_page(cc->rpages[i]); clear_page_dirty_for_io(cc->rpages[i]); goto retry_write; } err = ret; - goto out_fail; + goto out_err; } *submitted += _submitted; } return 0; - -out_fail: - /* TODO: revoke partially updated block addresses */ - BUG_ON(compr_blocks); out_err: for (++i; i < cc->cluster_size; i++) { if (!cc->rpages[i]) @@ -1075,7 +1282,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; dic->inode = cc->inode; - refcount_set(&dic->ref, 1); + refcount_set(&dic->ref, cc->nr_cpages); dic->cluster_idx = cc->cluster_idx; dic->cluster_size = cc->cluster_size; dic->log_cluster_size = cc->log_cluster_size; @@ -1099,8 +1306,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) goto out_free; f2fs_set_compressed_page(page, cc->inode, - start_idx + i + 1, - dic, i ? &dic->ref : NULL); + start_idx + i + 1, dic); dic->cpages[i] = page; } @@ -1110,20 +1316,16 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) goto out_free; for (i = 0; i < dic->cluster_size; i++) { - if (cc->rpages[i]) + if (cc->rpages[i]) { + dic->tpages[i] = cc->rpages[i]; continue; + } dic->tpages[i] = f2fs_grab_page(); if (!dic->tpages[i]) goto out_free; } - for (i = 0; i < dic->cluster_size; i++) { - if (dic->tpages[i]) - continue; - dic->tpages[i] = cc->rpages[i]; - } - return dic; out_free: @@ -1139,7 +1341,10 @@ void f2fs_free_dic(struct decompress_io_ctx *dic) for (i = 0; i < dic->cluster_size; i++) { if (dic->rpages[i]) continue; - f2fs_put_page(dic->tpages[i], 1); + if (!dic->tpages[i]) + continue; + unlock_page(dic->tpages[i]); + put_page(dic->tpages[i]); } kfree(dic->tpages); } @@ -1168,15 +1373,17 @@ void f2fs_decompress_end_io(struct page **rpages, if (!rpage) continue; - if (err || PageError(rpage)) { - ClearPageUptodate(rpage); - ClearPageError(rpage); - } else { - if (!verity || fsverity_verify_page(rpage)) - SetPageUptodate(rpage); - else - SetPageError(rpage); + if (err || PageError(rpage)) + goto clear_uptodate; + + if (!verity || fsverity_verify_page(rpage)) { + SetPageUptodate(rpage); + goto unlock; } +clear_uptodate: + ClearPageUptodate(rpage); + ClearPageError(rpage); +unlock: unlock_page(rpage); } } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b5b059f6379945fd4f83930ec9159c2296dfe2d3..ca538c1f5a26e846424f1f1756c07b4fa8ec086c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -56,17 +56,13 @@ static inline struct bio *__f2fs_bio_alloc(gfp_t gfp_mask, return bio_alloc_bioset(gfp_mask, nr_iovecs, f2fs_bioset); } -struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail) +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool noio) { - struct bio *bio; - - if (no_fail) { + if (noio) { /* No failure on bio allocation */ - bio = __f2fs_bio_alloc(GFP_NOIO, npages); - if (!bio) - bio = __f2fs_bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); - return bio; + return __f2fs_bio_alloc(GFP_NOIO, npages); } + if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO); return NULL; @@ -145,6 +141,8 @@ static void __read_end_io(struct bio *bio, bool compr, bool verity) f2fs_decompress_pages(bio, page, verity); continue; } + if (verity) + continue; #endif /* PG_error was set if any post_read step failed */ @@ -193,12 +191,38 @@ static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size) static void f2fs_verify_bio(struct bio *bio) { - struct page *page = bio->bi_io_vec[0].bv_page; - struct decompress_io_ctx *dic = - (struct decompress_io_ctx *)page_private(page); + struct bio_vec *bv; + int i; + + bio_for_each_segment_all(bv, bio, i) { + struct page *page = bv->bv_page; + struct decompress_io_ctx *dic; + + dic = (struct decompress_io_ctx *)page_private(page); + + if (dic) { + if (refcount_dec_not_one(&dic->ref)) + continue; + f2fs_verify_pages(dic->rpages, + dic->cluster_size); + f2fs_free_dic(dic); + continue; + } - f2fs_verify_pages(dic->rpages, dic->cluster_size); - f2fs_free_dic(dic); + if (bio->bi_status || PageError(page)) + goto clear_uptodate; + + if (fsverity_verify_page(page)) { + SetPageUptodate(page); + goto unlock; + } +clear_uptodate: + ClearPageUptodate(page); + ClearPageError(page); +unlock: + dec_page_count(F2FS_P_SB(page), __read_io_type(page)); + unlock_page(page); + } } #endif @@ -374,9 +398,6 @@ static void f2fs_write_end_io(struct bio *bio) bio_put(bio); } -/* - * Return true, if pre_bio's bdev is same as its target device. - */ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio) { @@ -413,6 +434,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) return 0; } +/* + * Return true, if pre_bio's bdev is same as its target device. + */ static bool __same_bdev(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio) { @@ -420,9 +444,6 @@ static bool __same_bdev(struct f2fs_sb_info *sbi, return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno; } -/* - * Low-level block read/write IO operations. - */ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) { struct f2fs_sb_info *sbi = fio->sbi; @@ -455,7 +476,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (type != DATA && type != NODE) goto submit_io; - if (test_opt(sbi, LFS) && current->plug) + if (f2fs_lfs_mode(sbi) && current->plug) blk_finish_plug(current->plug); if (F2FS_IO_ALIGNED(sbi)) @@ -528,6 +549,28 @@ void f2fs_submit_bio(struct f2fs_sb_info *sbi, __submit_bio(sbi, bio, type); } +static void __attach_data_io_flag(struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = fio->sbi; + unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1; + unsigned int fua_flag = sbi->data_io_flag & temp_mask; + unsigned int meta_flag = (sbi->data_io_flag >> NR_TEMP_TYPE) & + temp_mask; + /* + * data io flag bits per temp: + * REQ_META | REQ_FUA | + * 5 | 4 | 3 | 2 | 1 | 0 | + * Cold | Warm | Hot | Cold | Warm | Hot | + */ + if (fio->type != DATA) + return; + + if ((1 << fio->temp) & meta_flag) + fio->op_flags |= REQ_META; + if ((1 << fio->temp) & fua_flag) + fio->op_flags |= REQ_FUA; +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -535,6 +578,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) if (!io->bio) return; + __attach_data_io_flag(fio); bio_set_op_attrs(io->bio, fio->op, fio->op_flags); if (is_read_io(fio->op)) @@ -1008,14 +1052,15 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, unsigned nr_pages, unsigned op_flag, - pgoff_t first_idx) + pgoff_t first_idx, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; struct bio_post_read_ctx *ctx; unsigned int post_read_steps = 0; - bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); + bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), + for_write); if (!bio) return ERR_PTR(-ENOMEM); f2fs_target_device(sbi, blkaddr, bio); @@ -1054,12 +1099,12 @@ static void f2fs_release_read_bio(struct bio *bio) /* This can handle encryption stuffs */ static int f2fs_submit_page_read(struct inode *inode, struct page *page, - block_t blkaddr) + block_t blkaddr, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; - bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index); + bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index, for_write); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -1075,6 +1120,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, } ClearPageError(page); inc_page_count(sbi, F2FS_RD_DATA); + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); __f2fs_submit_read_bio(sbi, bio, DATA); return 0; } @@ -1134,8 +1180,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); for (; count > 0; dn->ofs_in_node++) { - block_t blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + block_t blkaddr = f2fs_data_blkaddr(dn); if (blkaddr == NULL_ADDR) { dn->data_blkaddr = NEW_ADDR; __set_data_blkaddr(dn); @@ -1249,7 +1294,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, return page; } - err = f2fs_submit_page_read(inode, page, dn.data_blkaddr); + err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, for_write); if (err) goto put_err; return page; @@ -1387,8 +1432,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) if (err) return err; - dn->data_blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = f2fs_data_blkaddr(dn); if (dn->data_blkaddr != NULL_ADDR) goto alloc; @@ -1475,13 +1519,9 @@ void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) } /* - * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with - * f2fs_map_blocks structure. - * If original data blocks are allocated, then give them to blockdev. - * Otherwise, - * a. preallocate requested block addresses - * b. do not use extent cache for better performance - * c. give the block addresses to blockdev + * f2fs_map_blocks() tries to find or build mapping relationship which + * maps continuous logical blocks to physical blocks, and return such + * info via f2fs_map_blocks structure. */ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag) @@ -1509,7 +1549,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end = pgofs + maxblocks; if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { - if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) goto next_dnode; @@ -1554,7 +1594,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end_offset = ADDRS_PER_PAGE(dn.node_page, inode); next_block: - blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + blkaddr = f2fs_data_blkaddr(&dn); if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { @@ -1564,7 +1604,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, if (__is_valid_data_blkaddr(blkaddr)) { /* use out-place-update for driect IO under LFS mode */ - if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { err = __allocate_data_block(&dn, map->m_seg_type); if (err) @@ -2078,7 +2118,8 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, page->index); + is_readahead ? REQ_RAHEAD : 0, page->index, + false); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -2098,6 +2139,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, goto submit_and_realloc; inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); + f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = block_nr; goto out; @@ -2115,7 +2157,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, #ifdef CONFIG_F2FS_FS_COMPRESSION int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead) + bool is_readahead, bool for_write) { struct dnode_of_data dn; struct inode *inode = cc->inode; @@ -2131,7 +2173,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; + last_block_in_file = (f2fs_readpage_limit(inode) + + blocksize - 1) >> blkbits; /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { @@ -2167,7 +2210,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, dn.node_page, + blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) @@ -2196,7 +2239,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, struct page *page = dic->cpages[i]; block_t blkaddr; - blkaddr = datablock_addr(dn.inode, dn.node_page, + blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i + 1); if (bio && (!page_is_mergeable(sbi, bio, @@ -2210,7 +2253,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, is_readahead ? REQ_RAHEAD : 0, - page->index); + page->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -2233,6 +2276,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, goto submit_and_realloc; inc_page_count(sbi, F2FS_RD_DATA); + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = blkaddr; } @@ -2311,7 +2355,7 @@ int f2fs_mpage_readpages(struct address_space *mapping, ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - is_readahead); + is_readahead, false); f2fs_destroy_compress_ctx(&cc); if (ret) goto set_error_page; @@ -2354,7 +2398,7 @@ int f2fs_mpage_readpages(struct address_space *mapping, ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - is_readahead); + is_readahead, false); f2fs_destroy_compress_ctx(&cc); } } @@ -2430,7 +2474,7 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio) /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { f2fs_flush_merged_writes(fio->sbi); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); gfp_flags |= __GFP_NOFAIL; goto retry_encrypt; } @@ -2501,7 +2545,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - if (test_opt(sbi, LFS)) + if (f2fs_lfs_mode(sbi)) return true; if (S_ISDIR(inode->i_mode)) return true; @@ -2719,8 +2763,8 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, f2fs_available_free_memory(sbi, BASE_CHECK)))) goto redirty_out; - /* Dentry blocks are controlled by checkpoint */ - if (S_ISDIR(inode->i_mode)) { + /* Dentry/quota blocks are controlled by checkpoint */ + if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) { fio.need_lock = LOCK_DONE; err = f2fs_do_write_data_page(&fio); goto done; @@ -2751,10 +2795,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, if (err) { file_set_keep_isize(inode); } else { - down_write(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; - up_write(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); } done: @@ -3021,7 +3065,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_ALL) { cond_resched(); congestion_wait(BLK_RW_ASYNC, - HZ/50); + DEFAULT_IO_TIMEOUT); goto retry_write; } goto next; @@ -3077,15 +3121,17 @@ static int f2fs_write_cache_pages(struct address_space *mapping, static inline bool __should_serialize_io(struct inode *inode, struct writeback_control *wbc) { + /* to avoid deadlock in path of data flush */ + if (F2FS_I(inode)->cp_task) + return false; + if (!S_ISREG(inode->i_mode)) return false; - if (f2fs_compressed_file(inode)) - return true; if (IS_NOQUOTA(inode)) return false; - /* to avoid deadlock in path of data flush */ - if (F2FS_I(inode)->cp_task) - return false; + + if (f2fs_compressed_file(inode)) + return true; if (wbc->sync_mode != WB_SYNC_ALL) return true; if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) @@ -3397,7 +3443,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, err = -EFSCORRUPTED; goto fail; } - err = f2fs_submit_page_read(inode, page, blkaddr); + err = f2fs_submit_page_read(inode, page, blkaddr, true); if (err) goto fail; @@ -3601,7 +3647,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, rw == WRITE ? get_data_block_dio_write : get_data_block_dio, NULL, f2fs_dio_submit_bio, - DIO_LOCKING | DIO_SKIP_HOLES); + rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES : + DIO_SKIP_HOLES); if (do_opu) up_read(&fi->i_gc_rwsem[READ]); @@ -3619,6 +3666,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else if (err < 0) { f2fs_write_failed(mapping, offset + count); } + } else { + if (err > 0) + f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err); } out: if (trace_android_fs_dataread_start_enabled() && @@ -3998,7 +4048,7 @@ void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi) int __init f2fs_init_bio_entry_cache(void) { - bio_entry_slab = f2fs_kmem_cache_create("bio_entry_slab", + bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab", sizeof(struct bio_entry)); if (!bio_entry_slab) return -ENOMEM; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 77a82f93b54c1faa3e4173d1a14286baf2891508..05e9ad91167ed793b3417d92ce9e518af9b3a8d1 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -301,6 +301,9 @@ static int stat_show(struct seq_file *s, void *v) si->ssa_area_segs, si->main_area_segs); seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", si->overp_segs, si->rsvd_segs); + seq_printf(s, "Current Time Sec: %llu / Mounted Time Sec: %llu\n\n", + ktime_get_boottime_seconds(), + SIT_I(si->sbi)->mounted_time); if (test_opt(si->sbi, DISCARD)) seq_printf(s, "Utilization: %u%% (%u valid blocks, %u discard blocks)\n", si->utilization, si->valid_count, si->discard_blks); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1972638165fd2f84f42d23d28c4f86bace36052e..0898fff692594ed166560eec294919734c9d73a0 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -114,8 +114,8 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, const struct qstr *entry, bool quick) { - const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct super_block *sb = parent->i_sb; + const struct unicode_map *um = sb->s_encoding; int ret; if (quick) @@ -127,7 +127,7 @@ int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, /* Handle invalid character sequence as either an error * or as an opaque byte sequence. */ - if (f2fs_has_strict_mode(sbi)) + if (sb_has_enc_strict_mode(sb)) return -EINVAL; if (name->len != entry->len) @@ -154,7 +154,7 @@ static void f2fs_fname_setup_ci_filename(struct inode *dir, if (!cf_name->name) return; - cf_name->len = utf8_casefold(sbi->s_encoding, + cf_name->len = utf8_casefold(dir->i_sb->s_encoding, iname, cf_name->name, F2FS_NAME_LEN); if ((int)cf_name->len <= 0) { @@ -173,7 +173,7 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, { #ifdef CONFIG_UNICODE struct inode *parent = d->inode; - struct f2fs_sb_info *sbi = F2FS_I_SB(parent); + struct super_block *sb = parent->i_sb; struct qstr entry; #endif @@ -184,7 +184,7 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, entry.name = d->filename[bit_pos]; entry.len = de->name_len; - if (sbi->s_encoding && IS_CASEFOLDED(parent)) { + if (sb->s_encoding && IS_CASEFOLDED(parent)) { if (cf_str->name) { struct qstr cf = {.name = cf_str->name, .len = cf_str->len}; @@ -357,8 +357,8 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, int err; #ifdef CONFIG_UNICODE - if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) && - utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) { + if (sb_has_enc_strict_mode(dir->i_sb) && IS_CASEFOLDED(dir) && + utf8_validate(dir->i_sb->s_encoding, child)) { *res_page = ERR_PTR(-EINVAL); return NULL; } @@ -471,7 +471,6 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, struct page *dpage) { struct page *page; - int dummy_encrypt = DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(dir)); int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { @@ -498,8 +497,7 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, if (err) goto put_error; - if ((IS_ENCRYPTED(dir) || dummy_encrypt) && - f2fs_may_encrypt(inode)) { + if (IS_ENCRYPTED(inode)) { err = fscrypt_inherit_context(dir, inode, page, false); if (err) goto put_error; @@ -850,12 +848,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, 0); set_page_dirty(page); - dir->i_ctime = dir->i_mtime = current_time(dir); - f2fs_mark_inode_dirty_sync(dir, false); - - if (inode) - f2fs_drop_nlink(dir, inode); - if (bit_pos == NR_DENTRY_IN_BLOCK && !f2fs_truncate_hole(dir, page->index, page->index + 1)) { f2fs_clear_radix_tree_dirty_tag(page); @@ -867,6 +859,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_remove_dirty_inode(dir); } f2fs_put_page(page, 1); + + dir->i_ctime = dir->i_mtime = current_time(dir); + f2fs_mark_inode_dirty_sync(dir, false); + + if (inode) + f2fs_drop_nlink(dir, inode); } bool f2fs_empty_dir(struct inode *dir) @@ -1079,51 +1077,8 @@ const struct file_operations f2fs_dir_operations = { }; #ifdef CONFIG_UNICODE -static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, - const char *str, const struct qstr *name) -{ - struct qstr qstr = {.name = str, .len = len }; - const struct dentry *parent = READ_ONCE(dentry->d_parent); - const struct inode *inode = READ_ONCE(parent->d_inode); - - if (!inode || !IS_CASEFOLDED(inode)) { - if (len != name->len) - return -1; - return memcmp(str, name->name, len); - } - - return f2fs_ci_compare(inode, name, &qstr, false); -} - -static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - const struct unicode_map *um = sbi->s_encoding; - const struct inode *inode = READ_ONCE(dentry->d_inode); - unsigned char *norm; - int len, ret = 0; - - if (!inode || !IS_CASEFOLDED(inode)) - return 0; - - norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC); - if (!norm) - return -ENOMEM; - - len = utf8_casefold(um, str, norm, PATH_MAX); - if (len < 0) { - if (f2fs_has_strict_mode(sbi)) - ret = -EINVAL; - goto out; - } - str->hash = full_name_hash(dentry, norm, len); -out: - kvfree(norm); - return ret; -} - const struct dentry_operations f2fs_dentry_ops = { - .d_hash = f2fs_d_hash, - .d_compare = f2fs_d_compare, + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, }; #endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5a673adc897aa9f9fcbded895c92704dc9419efc..a12a09565dc61f5c40e29c5e35e5e94a2acf1caf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -75,7 +75,6 @@ extern const char *f2fs_fault_name[FAULT_MAX]; /* * For mount options */ -#define F2FS_MOUNT_BG_GC 0x00000001 #define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 #define F2FS_MOUNT_DISCARD 0x00000004 #define F2FS_MOUNT_NOHEAP 0x00000008 @@ -89,11 +88,8 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_NOBARRIER 0x00000800 #define F2FS_MOUNT_FASTBOOT 0x00001000 #define F2FS_MOUNT_EXTENT_CACHE 0x00002000 -#define F2FS_MOUNT_FORCE_FG_GC 0x00004000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 -#define F2FS_MOUNT_ADAPTIVE 0x00020000 -#define F2FS_MOUNT_LFS 0x00040000 #define F2FS_MOUNT_USRQUOTA 0x00080000 #define F2FS_MOUNT_GRPQUOTA 0x00100000 #define F2FS_MOUNT_PRJQUOTA 0x00200000 @@ -101,6 +97,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 #define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 +#define F2FS_MOUNT_NORECOVERY 0x04000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -139,6 +136,8 @@ struct f2fs_mount_info { int whint_mode; int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ + int fs_mode; /* fs mode: LFS or ADAPTIVE */ + int bggc_mode; /* bggc mode: off, on or sync */ bool test_dummy_encryption; /* test dummy encryption */ block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint @@ -332,8 +331,8 @@ struct discard_policy { bool io_aware; /* issue discard in idle time */ bool sync; /* submit discard with REQ_SYNC flag */ bool ordered; /* issue discard by lba order */ + bool timeout; /* discard timeout for put_super */ unsigned int granularity; /* discard granularity */ - int timeout; /* discard timeout for put_super */ }; struct discard_cmd_control { @@ -428,6 +427,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) #define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) #define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) +#define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, __u64) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -557,6 +557,9 @@ enum { #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */ +/* congestion wait timeout value, default: 20ms */ +#define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20)) + /* maximum retry quota flush count */ #define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 @@ -673,6 +676,44 @@ enum { MAX_GC_FAILURE }; +/* used for f2fs_inode_info->flags */ +enum { + FI_NEW_INODE, /* indicate newly allocated inode */ + FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_AUTO_RECOVER, /* indicate inode is recoverable */ + FI_DIRTY_DIR, /* indicate directory has dirty pages */ + FI_INC_LINK, /* need to increment i_nlink */ + FI_ACL_MODE, /* indicate acl mode */ + FI_NO_ALLOC, /* should not allocate any blocks */ + FI_FREE_NID, /* free allocated nide */ + FI_NO_EXTENT, /* not to use the extent cache */ + FI_INLINE_XATTR, /* used for inline xattr */ + FI_INLINE_DATA, /* used for inline data*/ + FI_INLINE_DENTRY, /* used for inline dentry */ + FI_APPEND_WRITE, /* inode has appended data */ + FI_UPDATE_WRITE, /* inode has in-place-update data */ + FI_NEED_IPU, /* used for ipu per file */ + FI_ATOMIC_FILE, /* indicate atomic file */ + FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ + FI_VOLATILE_FILE, /* indicate volatile file */ + FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ + FI_DROP_CACHE, /* drop dirty page cache */ + FI_DATA_EXIST, /* indicate data exists */ + FI_INLINE_DOTS, /* indicate inline dot dentries */ + FI_DO_DEFRAG, /* indicate defragment is running */ + FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ + FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ + FI_HOT_DATA, /* indicate file is hot */ + FI_EXTRA_ATTR, /* indicate file has extra attribute */ + FI_PROJ_INHERIT, /* indicate file inherits projectid */ + FI_PIN_FILE, /* indicate file should not be gced */ + FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ + FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ + FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ + FI_MMAP_FILE, /* indicate file was mmapped */ + FI_MAX, /* max flag, never be used */ +}; + struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ unsigned long i_flags; /* keep an inode flags for ioctl */ @@ -685,7 +726,7 @@ struct f2fs_inode_info { umode_t i_acl_mode; /* keep file acl mode temporarily */ /* Use below internally in f2fs*/ - unsigned long flags; /* use to pass per-file flags */ + unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */ struct rw_semaphore i_sem; /* protect fi info */ atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ @@ -694,6 +735,7 @@ struct f2fs_inode_info { struct task_struct *cp_task; /* separate cp/wb IO stats*/ nid_t i_xattr_nid; /* node id that contains xattrs */ loff_t last_disk_size; /* lastly written file size */ + spinlock_t i_size_lock; /* protect last_disk_size */ #ifdef CONFIG_QUOTA struct dquot *i_dquot[MAXQUOTAS]; @@ -1043,8 +1085,9 @@ enum cp_reason_type { }; enum iostat_type { - APP_DIRECT_IO, /* app direct IOs */ - APP_BUFFERED_IO, /* app buffered IOs */ + /* WRITE IO */ + APP_DIRECT_IO, /* app direct write IOs */ + APP_BUFFERED_IO, /* app buffered write IOs */ APP_WRITE_IO, /* app write IOs */ APP_MAPPED_IO, /* app mapped IOs */ FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */ @@ -1055,6 +1098,17 @@ enum iostat_type { FS_CP_DATA_IO, /* data IOs from checkpoint */ FS_CP_NODE_IO, /* node IOs from checkpoint */ FS_CP_META_IO, /* meta IOs from checkpoint */ + + /* READ IO */ + APP_DIRECT_READ_IO, /* app direct read IOs */ + APP_BUFFERED_READ_IO, /* app buffered read IOs */ + APP_READ_IO, /* app read IOs */ + APP_MAPPED_READ_IO, /* app mapped read IOs */ + FS_DATA_READ_IO, /* data read IOs */ + FS_NODE_READ_IO, /* node read IOs */ + FS_META_READ_IO, /* meta read IOs */ + + /* other */ FS_DISCARD, /* discard */ NR_IO_TYPE, }; @@ -1169,6 +1223,20 @@ enum { GC_URGENT, }; +enum { + BGGC_MODE_ON, /* background gc is on */ + BGGC_MODE_OFF, /* background gc is off */ + BGGC_MODE_SYNC, /* + * background gc is on, migrating blocks + * like foreground gc + */ +}; + +enum { + FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ + FS_MODE_LFS, /* use lfs allocation only */ +}; + enum { WHINT_MODE_OFF, /* not pass down write hints */ WHINT_MODE_USER, /* try to pass down hints given by users */ @@ -1209,13 +1277,13 @@ enum fsync_mode { enum compress_algorithm_type { COMPRESS_LZO, COMPRESS_LZ4, + COMPRESS_ZSTD, COMPRESS_MAX, }; -#define COMPRESS_DATA_RESERVED_SIZE 4 +#define COMPRESS_DATA_RESERVED_SIZE 5 struct compress_data { __le32 clen; /* compressed data size */ - __le32 chksum; /* checksum of compressed data */ __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ u8 cdata[]; /* compressed data */ }; @@ -1239,6 +1307,7 @@ struct compress_ctx { size_t rlen; /* valid data length in rbuf */ size_t clen; /* valid data length in cbuf */ void *private; /* payload buffer for specified compression algorithm */ + void *private2; /* extra payload buffer */ }; /* compress context for write IO path */ @@ -1268,11 +1337,14 @@ struct decompress_io_ctx { size_t clen; /* valid data length in cbuf */ refcount_t ref; /* referrence count of compressed page */ bool failed; /* indicate IO error during decompression */ + void *private; /* payload buffer for specified decompression algorithm */ + void *private2; /* extra payload buffer */ }; #define NULL_CLUSTER ((unsigned int)(~0)) #define MIN_COMPRESS_LOG_SIZE 2 #define MAX_COMPRESS_LOG_SIZE 8 +#define MAX_COMPRESS_WINDOW_SIZE ((PAGE_SIZE) << MAX_COMPRESS_LOG_SIZE) struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ @@ -1282,10 +1354,6 @@ struct f2fs_sb_info { int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ struct mutex writepages; /* mutex for writepages() */ -#ifdef CONFIG_UNICODE - struct unicode_map *s_encoding; - __u16 s_encoding_flags; -#endif #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ @@ -1441,8 +1509,14 @@ struct f2fs_sb_info { /* For app/fs IO statistics */ spinlock_t iostat_lock; - unsigned long long write_iostat[NR_IO_TYPE]; + unsigned long long rw_iostat[NR_IO_TYPE]; + unsigned long long prev_rw_iostat[NR_IO_TYPE]; bool iostat_enable; + unsigned long iostat_next_period; + unsigned int iostat_period_ms; + + /* to attach REQ_META|REQ_FUA flags */ + unsigned int data_io_flag; /* For sysfs suppport */ struct kobject s_kobj; @@ -1468,6 +1542,9 @@ struct f2fs_sb_info { __u32 s_chksum_seed; struct workqueue_struct *post_read_wq; /* post read workqueue */ + + struct kmem_cache *inline_xattr_slab; /* inline xattr entry */ + unsigned int inline_xattr_slab_size; /* default inline xattr slab size */ }; struct f2fs_private_dio { @@ -2209,7 +2286,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, dquot_free_inode(inode); } else { if (unlikely(inode->i_blocks == 0)) { - f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + f2fs_warn(sbi, "dec_valid_node_count: inconsistent i_blocks, ino:%lu, iblocks:%llu", inode->i_ino, (unsigned long long)inode->i_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -2377,7 +2454,7 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node) } static inline int f2fs_has_extra_attr(struct inode *inode); -static inline block_t datablock_addr(struct inode *inode, +static inline block_t data_blkaddr(struct inode *inode, struct page *node_page, unsigned int offset) { struct f2fs_node *raw_node; @@ -2387,9 +2464,9 @@ static inline block_t datablock_addr(struct inode *inode, raw_node = F2FS_NODE(node_page); - /* from GC path only */ if (is_inode) { if (!inode) + /* from GC path only */ base = offset_in_addr(&raw_node->i); else if (f2fs_has_extra_attr(inode)) base = get_extra_isize(inode); @@ -2399,6 +2476,11 @@ static inline block_t datablock_addr(struct inode *inode, return le32_to_cpu(addr_array[base + offset]); } +static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn) +{ + return data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node); +} + static inline int f2fs_test_bit(unsigned int nr, char *addr) { int mask; @@ -2496,43 +2578,6 @@ static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) return flags & F2FS_OTHER_FLMASK; } -/* used for f2fs_inode_info->flags */ -enum { - FI_NEW_INODE, /* indicate newly allocated inode */ - FI_DIRTY_INODE, /* indicate inode is dirty or not */ - FI_AUTO_RECOVER, /* indicate inode is recoverable */ - FI_DIRTY_DIR, /* indicate directory has dirty pages */ - FI_INC_LINK, /* need to increment i_nlink */ - FI_ACL_MODE, /* indicate acl mode */ - FI_NO_ALLOC, /* should not allocate any blocks */ - FI_FREE_NID, /* free allocated nide */ - FI_NO_EXTENT, /* not to use the extent cache */ - FI_INLINE_XATTR, /* used for inline xattr */ - FI_INLINE_DATA, /* used for inline data*/ - FI_INLINE_DENTRY, /* used for inline dentry */ - FI_APPEND_WRITE, /* inode has appended data */ - FI_UPDATE_WRITE, /* inode has in-place-update data */ - FI_NEED_IPU, /* used for ipu per file */ - FI_ATOMIC_FILE, /* indicate atomic file */ - FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ - FI_VOLATILE_FILE, /* indicate volatile file */ - FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ - FI_DROP_CACHE, /* drop dirty page cache */ - FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ - FI_DO_DEFRAG, /* indicate defragment is running */ - FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ - FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ - FI_HOT_DATA, /* indicate file is hot */ - FI_EXTRA_ATTR, /* indicate file has extra attribute */ - FI_PROJ_INHERIT, /* indicate file inherits projectid */ - FI_PIN_FILE, /* indicate file should not be gced */ - FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ - FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ - FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ - FI_MMAP_FILE, /* indicate file was mmapped */ -}; - static inline void __mark_inode_dirty_flag(struct inode *inode, int flag, bool set) { @@ -2547,27 +2592,24 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: - case FI_COMPRESSED_FILE: f2fs_mark_inode_dirty_sync(inode, true); } } static inline void set_inode_flag(struct inode *inode, int flag) { - if (!test_bit(flag, &F2FS_I(inode)->flags)) - set_bit(flag, &F2FS_I(inode)->flags); + test_and_set_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, true); } static inline int is_inode_flag_set(struct inode *inode, int flag) { - return test_bit(flag, &F2FS_I(inode)->flags); + return test_bit(flag, F2FS_I(inode)->flags); } static inline void clear_inode_flag(struct inode *inode, int flag) { - if (test_bit(flag, &F2FS_I(inode)->flags)) - clear_bit(flag, &F2FS_I(inode)->flags); + test_and_clear_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, false); } @@ -2658,19 +2700,19 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) struct f2fs_inode_info *fi = F2FS_I(inode); if (ri->i_inline & F2FS_INLINE_XATTR) - set_bit(FI_INLINE_XATTR, &fi->flags); + set_bit(FI_INLINE_XATTR, fi->flags); if (ri->i_inline & F2FS_INLINE_DATA) - set_bit(FI_INLINE_DATA, &fi->flags); + set_bit(FI_INLINE_DATA, fi->flags); if (ri->i_inline & F2FS_INLINE_DENTRY) - set_bit(FI_INLINE_DENTRY, &fi->flags); + set_bit(FI_INLINE_DENTRY, fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) - set_bit(FI_DATA_EXIST, &fi->flags); + set_bit(FI_DATA_EXIST, fi->flags); if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, &fi->flags); + set_bit(FI_INLINE_DOTS, fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) - set_bit(FI_EXTRA_ATTR, &fi->flags); + set_bit(FI_EXTRA_ATTR, fi->flags); if (ri->i_inline & F2FS_PIN_FILE) - set_bit(FI_PIN_FILE, &fi->flags); + set_bit(FI_PIN_FILE, fi->flags); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) @@ -2855,9 +2897,9 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) if (!f2fs_is_time_consistent(inode)) return false; - down_read(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); ret = F2FS_I(inode)->last_disk_size == i_size_read(inode); - up_read(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); return ret; } @@ -2966,29 +3008,45 @@ static inline int get_inline_xattr_addrs(struct inode *inode) sizeof((f2fs_inode)->field)) \ <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ +#define DEFAULT_IOSTAT_PERIOD_MS 3000 +#define MIN_IOSTAT_PERIOD_MS 100 +/* maximum period of iostat tracing is 1 day */ +#define MAX_IOSTAT_PERIOD_MS 8640000 + static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) { int i; spin_lock(&sbi->iostat_lock); - for (i = 0; i < NR_IO_TYPE; i++) - sbi->write_iostat[i] = 0; + for (i = 0; i < NR_IO_TYPE; i++) { + sbi->rw_iostat[i] = 0; + sbi->prev_rw_iostat[i] = 0; + } spin_unlock(&sbi->iostat_lock); } +extern void f2fs_record_iostat(struct f2fs_sb_info *sbi); + static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, enum iostat_type type, unsigned long long io_bytes) { if (!sbi->iostat_enable) return; spin_lock(&sbi->iostat_lock); - sbi->write_iostat[type] += io_bytes; + sbi->rw_iostat[type] += io_bytes; if (type == APP_WRITE_IO || type == APP_DIRECT_IO) - sbi->write_iostat[APP_BUFFERED_IO] = - sbi->write_iostat[APP_WRITE_IO] - - sbi->write_iostat[APP_DIRECT_IO]; + sbi->rw_iostat[APP_BUFFERED_IO] = + sbi->rw_iostat[APP_WRITE_IO] - + sbi->rw_iostat[APP_DIRECT_IO]; + + if (type == APP_READ_IO || type == APP_DIRECT_READ_IO) + sbi->rw_iostat[APP_BUFFERED_READ_IO] = + sbi->rw_iostat[APP_READ_IO] - + sbi->rw_iostat[APP_DIRECT_READ_IO]; spin_unlock(&sbi->iostat_lock); + + f2fs_record_iostat(sbi); } #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) @@ -3211,7 +3269,7 @@ void f2fs_drop_inmem_pages(struct inode *inode); void f2fs_drop_inmem_page(struct inode *inode, struct page *page); int f2fs_commit_inmem_pages(struct inode *inode); void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); -void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); +void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg); int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi); int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); @@ -3305,7 +3363,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi); void f2fs_update_dirty_page(struct inode *inode, struct page *page); void f2fs_remove_dirty_inode(struct inode *inode); int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); -void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi); +void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type); int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi); int __init f2fs_create_checkpoint_caches(void); @@ -3316,7 +3374,7 @@ void f2fs_destroy_checkpoint_caches(void); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); -struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail); +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool noio); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_bio(struct f2fs_sb_info *sbi, @@ -3772,7 +3830,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead); + bool is_readahead, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_free_dic(struct decompress_io_ctx *dic); void f2fs_decompress_end_io(struct page **rpages, @@ -3809,6 +3867,7 @@ static inline void set_compress_context(struct inode *inode) F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; set_inode_flag(inode, FI_COMPRESSED_FILE); stat_inc_compr_inode(inode); + f2fs_mark_inode_dirty_sync(inode, true); } static inline u64 f2fs_disable_compressed_file(struct inode *inode) @@ -3817,12 +3876,17 @@ static inline u64 f2fs_disable_compressed_file(struct inode *inode) if (!f2fs_compressed_file(inode)) return 0; - if (fi->i_compr_blocks) - return fi->i_compr_blocks; + if (S_ISREG(inode->i_mode)) { + if (get_dirty_pages(inode)) + return 1; + if (fi->i_compr_blocks) + return fi->i_compr_blocks; + } fi->i_flags &= ~F2FS_COMPR_FL; - clear_inode_flag(inode, FI_COMPRESSED_FILE); stat_dec_compr_inode(inode); + clear_inode_flag(inode, FI_COMPRESSED_FILE); + f2fs_mark_inode_dirty_sync(inode, true); return 0; } @@ -3899,31 +3963,25 @@ static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi) return false; } - -static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) +static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) { - clear_opt(sbi, ADAPTIVE); - clear_opt(sbi, LFS); - - switch (mt) { - case F2FS_MOUNT_ADAPTIVE: - set_opt(sbi, ADAPTIVE); - break; - case F2FS_MOUNT_LFS: - set_opt(sbi, LFS); - break; - } + return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } -static inline bool f2fs_may_encrypt(struct inode *inode) +static inline bool f2fs_may_encrypt(struct inode *dir, struct inode *inode) { #ifdef CONFIG_FS_ENCRYPTION + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); umode_t mode = inode->i_mode; - return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); -#else - return false; + /* + * If the directory encrypted or dummy encryption enabled, + * then we should encrypt the inode. + */ + if (IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) + return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); #endif + return false; } static inline bool f2fs_may_compress(struct inode *inode) @@ -3967,7 +4025,7 @@ static inline int allow_outplace_dio(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int rw = iov_iter_rw(iter); - return (test_opt(sbi, LFS) && (rw == WRITE) && + return (f2fs_lfs_mode(sbi) && (rw == WRITE) && !block_unaligned_IO(inode, iocb, iter)); } @@ -3990,7 +4048,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, */ if (f2fs_sb_has_blkzoned(sbi)) return true; - if (test_opt(sbi, LFS) && (rw == WRITE)) { + if (f2fs_lfs_mode(sbi) && (rw == WRITE)) { if (block_unaligned_IO(inode, iocb, iter)) return true; if (F2FS_IO_ALIGNED(sbi)) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f5724c62334f339c19665d1117f37dd7668ae54b..b0c432c0fbb2aa71361c741e7485112baea316c1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -40,6 +40,10 @@ static int f2fs_filemap_fault(struct vm_fault *vmf) err = filemap_fault(vmf); up_read(&F2FS_I(inode)->i_mmap_sem); + if (!err) + f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO, + F2FS_BLKSIZE); + trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)err); return err; @@ -106,13 +110,20 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) err = f2fs_get_block(&dn, page->index); f2fs_put_dnode(&dn); __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); - if (err) { - unlock_page(page); - goto out_sem; - } } - /* fill the page */ +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (!need_alloc) { + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + f2fs_put_dnode(&dn); + } +#endif + if (err) { + unlock_page(page); + goto out_sem; + } + f2fs_wait_on_page_writeback(page, DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ @@ -459,8 +470,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) data_ofs = (loff_t)pgofs << PAGE_SHIFT) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, - dn.node_page, dn.ofs_in_node); + blkaddr = f2fs_data_blkaddr(&dn); if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), @@ -804,6 +814,8 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, } flags = fi->i_flags; + if (flags & F2FS_COMPR_FL) + stat->attributes |= STATX_ATTR_COMPRESSED; if (flags & F2FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; if (IS_ENCRYPTED(inode)) @@ -815,7 +827,8 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, if (IS_VERITY(inode)) stat->attributes |= STATX_ATTR_VERITY; - stat->attributes_mask |= (STATX_ATTR_APPEND | + stat->attributes_mask |= (STATX_ATTR_COMPRESSED | + STATX_ATTR_APPEND | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP | @@ -943,10 +956,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - down_write(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); inode->i_mtime = inode->i_ctime = current_time(inode); F2FS_I(inode)->last_disk_size = i_size_read(inode); - up_write(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); } __setattr_copy(inode, attr); @@ -1123,8 +1136,7 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { - *blkaddr = datablock_addr(dn.inode, - dn.node_page, dn.ofs_in_node); + *blkaddr = f2fs_data_blkaddr(&dn); if (__is_valid_data_blkaddr(*blkaddr) && !f2fs_is_valid_blkaddr(sbi, *blkaddr, @@ -1135,7 +1147,7 @@ static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { - if (test_opt(sbi, LFS)) { + if (f2fs_lfs_mode(sbi)) { f2fs_put_dnode(&dn); return -EOPNOTSUPP; } @@ -1213,8 +1225,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, ADDRS_PER_PAGE(dn.node_page, dst_inode) - dn.ofs_in_node, len - i); do { - dn.data_blkaddr = datablock_addr(dn.inode, - dn.node_page, dn.ofs_in_node); + dn.data_blkaddr = f2fs_data_blkaddr(&dn); f2fs_truncate_data_blocks_range(&dn, 1); if (do_replace[i]) { @@ -1390,8 +1401,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, int ret; for (; index < end; index++, dn->ofs_in_node++) { - if (datablock_addr(dn->inode, dn->node_page, - dn->ofs_in_node) == NULL_ADDR) + if (f2fs_data_blkaddr(dn) == NULL_ADDR) count++; } @@ -1402,8 +1412,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, dn->ofs_in_node = ofs_in_node; for (index = start; index < end; index++, dn->ofs_in_node++) { - dn->data_blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = f2fs_data_blkaddr(dn); /* * f2fs_reserve_new_blocks will not guarantee entire block * allocation. @@ -1801,12 +1810,15 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) { struct f2fs_inode_info *fi = F2FS_I(inode); + u32 masked_flags = fi->i_flags & mask; + + f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask)); /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; - if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) { + if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) return -EOPNOTSUPP; if (!f2fs_empty_dir(inode)) @@ -1820,27 +1832,22 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) return -EINVAL; } - if ((iflags ^ fi->i_flags) & F2FS_COMPR_FL) { - if (S_ISREG(inode->i_mode) && - (fi->i_flags & F2FS_COMPR_FL || i_size_read(inode) || - F2FS_HAS_BLOCKS(inode))) - return -EINVAL; + if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { + if (masked_flags & F2FS_COMPR_FL) { + if (f2fs_disable_compressed_file(inode)) + return -EINVAL; + } if (iflags & F2FS_NOCOMP_FL) return -EINVAL; if (iflags & F2FS_COMPR_FL) { - int err = f2fs_convert_inline_inode(inode); - - if (err) - return err; - if (!f2fs_may_compress(inode)) return -EINVAL; set_compress_context(inode); } } - if ((iflags ^ fi->i_flags) & F2FS_NOCOMP_FL) { - if (fi->i_flags & F2FS_COMPR_FL) + if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) { + if (masked_flags & F2FS_COMPR_FL) return -EINVAL; } @@ -3345,6 +3352,21 @@ static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) return fsverity_ioctl_measure(filp, (void __user *)arg); } +static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u64 blocks; + + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + + if (!f2fs_compressed_file(inode)) + return -EINVAL; + + blocks = F2FS_I(inode)->i_compr_blocks; + return put_user(blocks, (u64 __user *)arg); +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3419,6 +3441,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_enable_verity(filp, arg); case FS_IOC_MEASURE_VERITY: return f2fs_ioc_measure_verity(filp, arg); + case F2FS_IOC_GET_COMPRESS_BLOCKS: + return f2fs_get_compress_blocks(filp, arg); default: return -ENOTTY; } @@ -3428,11 +3452,17 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + int ret; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; - return generic_file_read_iter(iocb, iter); + ret = generic_file_read_iter(iocb, iter); + + if (ret > 0) + f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret); + + return ret; } static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) @@ -3446,8 +3476,10 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } - if (!f2fs_is_compress_backend_ready(inode)) - return -EOPNOTSUPP; + if (!f2fs_is_compress_backend_ready(inode)) { + ret = -EOPNOTSUPP; + goto out; + } if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) { @@ -3573,6 +3605,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_RESIZE_FS: case FS_IOC_ENABLE_VERITY: case FS_IOC_MEASURE_VERITY: + case F2FS_IOC_GET_COMPRESS_BLOCKS: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index cf4cc0dce2a12077ce95ef705d73e907642900a9..0c03f7d05fa6f6cc6c4e4eeccd103ca98107bae3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -31,6 +31,8 @@ static int gc_thread_func(void *data) set_freezable(); do { + bool sync_mode; + wait_event_interruptible_timeout(*wq, kthread_should_stop() || freezing(current) || gc_th->gc_wake, @@ -101,15 +103,17 @@ static int gc_thread_func(void *data) do_gc: stat_inc_bggc_count(sbi->stat_info); + sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO)) + if (f2fs_gc(sbi, sync_mode, true, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; trace_f2fs_background_gc(sbi->sb, wait_ms, prefree_segments(sbi), free_segments(sbi)); /* balancing f2fs's metadata periodically */ - f2fs_balance_fs_bg(sbi); + f2fs_balance_fs_bg(sbi, true); next: sb_end_write(sbi->sb); @@ -192,7 +196,10 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->ofs_unit = sbi->segs_per_sec; } - /* we need to check every dirty segments in the FG_GC case */ + /* + * adjust candidates range, should select all dirty segments for + * foreground GC and urgent GC cases. + */ if (gc_type != FG_GC && (sbi->gc_mode != GC_URGENT) && p->max_search > sbi->max_victim_search) @@ -634,7 +641,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } *nofs = ofs_of_node(node_page); - source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node); + source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); f2fs_put_page(node_page, 1); if (source_blkaddr != blkaddr) { @@ -730,6 +737,9 @@ static int ra_data_block(struct inode *inode, pgoff_t index) goto put_encrypted_page; f2fs_put_page(fio.encrypted_page, 0); f2fs_put_page(page, 1); + + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + return 0; put_encrypted_page: f2fs_put_page(fio.encrypted_page, 1); @@ -762,7 +772,7 @@ static int move_data_block(struct inode *inode, block_t bidx, struct page *page, *mpage; block_t newaddr; int err = 0; - bool lfs_mode = test_opt(fio.sbi, LFS); + bool lfs_mode = f2fs_lfs_mode(fio.sbi); /* do not read out */ page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); @@ -834,6 +844,9 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_put_page(mpage, 1); goto up_out; } + + f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + lock_page(mpage); if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || !PageUptodate(mpage))) { @@ -971,7 +984,8 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, if (err) { clear_cold_data(page); if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry; } if (is_dirty) @@ -1019,8 +1033,8 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, * race condition along with SSR block allocation. */ if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || - get_valid_blocks(sbi, segno, false) == - sbi->blocks_per_seg) + get_valid_blocks(sbi, segno, true) == + BLKS_PER_SEC(sbi)) return submitted; if (check_valid_map(sbi, segno, off) == 0) @@ -1204,7 +1218,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, if (get_valid_blocks(sbi, segno, false) == 0) goto freed; - if (__is_large_section(sbi) && + if (gc_type == BG_GC && __is_large_section(sbi) && migrated >= sbi->migration_granularity) goto skip; if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) @@ -1234,12 +1248,12 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, segno, gc_type); stat_inc_seg_count(sbi, type, gc_type); + migrated++; freed: if (gc_type == FG_GC && get_valid_blocks(sbi, segno, false) == 0) seg_freed++; - migrated++; if (__is_large_section(sbi) && segno + 1 < end_segno) sbi->next_victim_seg[gc_type] = segno + 1; @@ -1435,12 +1449,19 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) { struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi); - int section_count = le32_to_cpu(raw_sb->section_count); - int segment_count = le32_to_cpu(raw_sb->segment_count); - int segment_count_main = le32_to_cpu(raw_sb->segment_count_main); - long long block_count = le64_to_cpu(raw_sb->block_count); + int section_count; + int segment_count; + int segment_count_main; + long long block_count; int segs = secs * sbi->segs_per_sec; + down_write(&sbi->sb_lock); + + section_count = le32_to_cpu(raw_sb->section_count); + segment_count = le32_to_cpu(raw_sb->segment_count); + segment_count_main = le32_to_cpu(raw_sb->segment_count_main); + block_count = le64_to_cpu(raw_sb->block_count); + raw_sb->section_count = cpu_to_le32(section_count + secs); raw_sb->segment_count = cpu_to_le32(segment_count + segs); raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); @@ -1454,6 +1475,8 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) raw_sb->devs[last_dev].total_segments = cpu_to_le32(dev_segs + segs); } + + up_write(&sbi->sb_lock); } static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) @@ -1571,11 +1594,17 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) goto out; } + mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, -secs); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + set_sbi_flag(sbi, SBI_IS_DIRTY); + mutex_unlock(&sbi->cp_mutex); + err = f2fs_sync_fs(sbi->sb, 1); if (err) { + mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, secs); + mutex_unlock(&sbi->cp_mutex); update_sb_metadata(sbi, secs); f2fs_commit_super(sbi, false); } diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 5bc4dcd8fc03fbbb97ece6233c925634dc972fa8..28acb24e7a7a8dd26232239cb63ca81cbcfb745c 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -110,7 +110,7 @@ f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, { #ifdef CONFIG_UNICODE struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; struct qstr folded; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 12f5649febeafc31bf168da9e7050fd5a62b2938..482d267f4a639cd955523378e341c5283a3d4ace 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -291,13 +291,30 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) fi->i_flags & F2FS_COMPR_FL && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_log_cluster_size)) { - if (ri->i_compress_algorithm >= COMPRESS_MAX) + if (ri->i_compress_algorithm >= COMPRESS_MAX) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported " + "compress algorithm: %u, run fsck to fix", + __func__, inode->i_ino, + ri->i_compress_algorithm); return false; - if (le64_to_cpu(ri->i_compr_blocks) > inode->i_blocks) + } + if (le64_to_cpu(ri->i_compr_blocks) > + SECTOR_TO_BLOCK(inode->i_blocks)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has inconsistent " + "i_compr_blocks:%llu, i_blocks:%lu, run fsck to fix", + __func__, inode->i_ino, + le64_to_cpu(ri->i_compr_blocks), + SECTOR_TO_BLOCK(inode->i_blocks)); return false; + } if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE || - ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) + ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported " + "log cluster size: %u, run fsck to fix", + __func__, inode->i_ino, + ri->i_log_cluster_size); return false; + } } return true; @@ -345,7 +362,7 @@ static int do_read_inode(struct inode *inode) fi->i_flags = le32_to_cpu(ri->i_flags); if (S_ISREG(inode->i_mode)) fi->i_flags &= ~F2FS_PROJINHERIT_FL; - fi->flags = 0; + bitmap_zero(fi->flags, FI_MAX); fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; @@ -518,7 +535,7 @@ struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino) inode = f2fs_iget(sb, ino); if (IS_ERR(inode)) { if (PTR_ERR(inode) == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry; } } @@ -762,7 +779,7 @@ void f2fs_evict_inode(struct inode *inode) f2fs_bug_on(sbi, 1); } - /* ino == 0, if f2fs_new_inode() was failed t*/ + /* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */ if (inode->i_ino) invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 76ba1e2fee480123944df8744984210866dabedc..23d6cccdb4c1556b03456968ff8511a7129889a6 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -75,9 +75,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) set_inode_flag(inode, FI_NEW_INODE); - /* If the directory encrypted, then we should encrypt the inode. */ - if ((IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) && - f2fs_may_encrypt(inode)) + if (f2fs_may_encrypt(dir, inode)) f2fs_set_encrypted_inode(inode); if (f2fs_sb_has_extra_attr(sbi)) { @@ -177,7 +175,7 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub) } /* - * Set multimedia files as cold files for hot/cold data separation + * Set file's temperature for hot/cold data separation */ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) @@ -876,12 +874,6 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - if (IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) { - int err = fscrypt_get_encryption_info(dir); - if (err) - return err; - } - return __f2fs_tmpfile(dir, dentry, mode, NULL); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index dcacdab66558811d832d42c9e68e45bd6ebf168a..1e80fa16fb01be745dd915968c93cd073aca532d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -510,9 +510,6 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) return nr - nr_shrink; } -/* - * This function always returns success - */ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) { @@ -714,8 +711,7 @@ static int get_node_path(struct inode *inode, long block, /* * Caller should call f2fs_put_dnode(dn). * Also, it should grab and release a rwsem by calling f2fs_lock_op() and - * f2fs_unlock_op() only if ro is not set RDONLY_NODE. - * In the case of RDONLY_NODE, we don't need to care about mutex. + * f2fs_unlock_op() only if mode is set with ALLOC_NODE. */ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { @@ -807,8 +803,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) dn->nid = nids[level]; dn->ofs_in_node = offset[level]; dn->node_page = npage[level]; - dn->data_blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = f2fs_data_blkaddr(dn); return 0; release_pages: @@ -1184,8 +1179,9 @@ int f2fs_remove_inode_page(struct inode *inode) } if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) { - f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu", - inode->i_ino, (unsigned long long)inode->i_blocks); + f2fs_warn(F2FS_I_SB(inode), + "f2fs_remove_inode_page: inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, (unsigned long long)inode->i_blocks); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); } @@ -1300,7 +1296,13 @@ static int read_node_page(struct page *page, int op_flags) } fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; - return f2fs_submit_page_bio(&fio); + + err = f2fs_submit_page_bio(&fio); + + if (!err) + f2fs_update_iostat(sbi, FS_NODE_READ_IO, F2FS_BLKSIZE); + + return err; } /* @@ -1560,15 +1562,16 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; - set_page_writeback(page); - ClearPageError(page); - + /* should add to global list before clearing PAGECACHE status */ if (f2fs_in_warm_node_list(sbi, page)) { seq = f2fs_add_fsync_node_entry(sbi, page); if (seq_id) *seq_id = seq; } + set_page_writeback(page); + ClearPageError(page); + fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); @@ -1977,7 +1980,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, goto skip_write; /* balancing f2fs's metadata in background */ - f2fs_balance_fs_bg(sbi); + f2fs_balance_fs_bg(sbi, true); /* collect a number of dirty node pages and write together */ if (wbc->sync_mode != WB_SYNC_ALL && @@ -2594,7 +2597,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) retry: ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); if (!ipage) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry; } @@ -3184,22 +3187,22 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) int __init f2fs_create_node_manager_caches(void) { - nat_entry_slab = f2fs_kmem_cache_create("nat_entry", + nat_entry_slab = f2fs_kmem_cache_create("f2fs_nat_entry", sizeof(struct nat_entry)); if (!nat_entry_slab) goto fail; - free_nid_slab = f2fs_kmem_cache_create("free_nid", + free_nid_slab = f2fs_kmem_cache_create("f2fs_free_nid", sizeof(struct free_nid)); if (!free_nid_slab) goto destroy_nat_entry; - nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", + nat_entry_set_slab = f2fs_kmem_cache_create("f2fs_nat_entry_set", sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) goto destroy_free_nid; - fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry", + fsync_node_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_node_entry", sizeof(struct fsync_node_entry)); if (!fsync_node_entry_slab) goto destroy_nat_entry_set; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f177e2749f19c487db65865bc48ad681487725b9..5288a6f71ca24b3353eb2ed63e594ab614d133c3 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -496,8 +496,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, return 0; truncate_out: - if (datablock_addr(tdn.inode, tdn.node_page, - tdn.ofs_in_node) == blkaddr) + if (f2fs_data_blkaddr(&tdn) == blkaddr) f2fs_truncate_data_blocks_range(&tdn, 1); if (dn->inode->i_ino == nid && !dn->inode_page_locked) unlock_page(dn->inode_page); @@ -535,7 +534,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry_dn; } goto out; @@ -560,8 +559,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, for (; start < end; start++, dn.ofs_in_node++) { block_t src, dest; - src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - dest = datablock_addr(dn.inode, page, dn.ofs_in_node); + src = f2fs_data_blkaddr(&dn); + dest = data_blkaddr(dn.inode, page, dn.ofs_in_node); if (__is_valid_data_blkaddr(src) && !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { @@ -618,7 +617,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, err = check_index_in_prev_nodes(sbi, dest, &dn); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry_prev; } goto err; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6b2538589c0731b790b9815e720cfab0a220da23..6da162a71105a9ecc3494d765da014e300396482 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -172,7 +172,7 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); - if (test_opt(sbi, LFS)) + if (f2fs_lfs_mode(sbi)) return false; if (sbi->gc_mode == GC_URGENT) return true; @@ -245,7 +245,8 @@ static int __revoke_inmem_pages(struct inode *inode, LOOKUP_NODE); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); cond_resched(); goto retry; } @@ -310,7 +311,7 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) skip: iput(inode); } - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); cond_resched(); if (gc_failure) { if (++looped >= count) @@ -413,7 +414,8 @@ static int __f2fs_commit_inmem_pages(struct inode *inode) err = f2fs_do_write_data_page(&fio); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); cond_resched(); goto retry; } @@ -492,7 +494,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) /* balance_fs_bg is able to be pending */ if (need && excess_cached_nats(sbi)) - f2fs_balance_fs_bg(sbi); + f2fs_balance_fs_bg(sbi, false); if (!f2fs_is_checkpoint_ready(sbi)) return; @@ -507,7 +509,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) } } -void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) +void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) { if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return; @@ -536,7 +538,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) || f2fs_time_over(sbi, CP_TIME)) { - if (test_opt(sbi, DATA_FLUSH)) { + if (test_opt(sbi, DATA_FLUSH) && from_bg) { struct blk_plug plug; mutex_lock(&sbi->flush_lock); @@ -1025,9 +1027,9 @@ static void f2fs_submit_discard_endio(struct bio *bio) struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; unsigned long flags; - dc->error = blk_status_to_errno(bio->bi_status); - spin_lock_irqsave(&dc->lock, flags); + if (!dc->error) + dc->error = blk_status_to_errno(bio->bi_status); dc->bio_ref--; if (!dc->bio_ref && dc->state == D_SUBMIT) { dc->state = D_DONE; @@ -1076,7 +1078,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; dpolicy->io_aware_gran = MAX_PLIST_NUM; - dpolicy->timeout = 0; + dpolicy->timeout = false; if (discard_type == DPOLICY_BG) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; @@ -1097,10 +1099,10 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->max_requests = UINT_MAX; dpolicy->io_aware = false; /* we need to issue all to keep CP_TRIMMED_FLAG */ dpolicy->granularity = 1; + dpolicy->timeout = true; } } @@ -1210,8 +1212,10 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, len = total_len; } - if (!err && len) + if (!err && len) { + dcc->undiscard_blks -= len; __update_discard_tree_range(sbi, bdev, lstart, start, len); + } return err; } @@ -1458,6 +1462,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, return issued; } +static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy); static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) @@ -1466,15 +1472,17 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int i, issued = 0; + int i, issued; bool io_interrupted = false; - if (dpolicy->timeout != 0) - f2fs_update_time(sbi, dpolicy->timeout); + if (dpolicy->timeout) + f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); +retry: + issued = 0; for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { - if (dpolicy->timeout != 0 && - f2fs_time_over(sbi, dpolicy->timeout)) + if (dpolicy->timeout && + f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) break; if (i + 1 < dpolicy->granularity) @@ -1495,8 +1503,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); - if (dpolicy->timeout != 0 && - f2fs_time_over(sbi, dpolicy->timeout)) + if (dpolicy->timeout && + f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) break; if (dpolicy->io_aware && i < dpolicy->io_aware_gran && @@ -1518,6 +1526,11 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, break; } + if (dpolicy->type == DPOLICY_UMOUNT && issued) { + __wait_all_discard_cmd(sbi, dpolicy); + goto retry; + } + if (!issued && io_interrupted) issued = -1; @@ -1675,7 +1688,6 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); - dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT; __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); @@ -1937,7 +1949,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); - bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); + bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); mutex_lock(&dirty_i->seglist_lock); @@ -1969,7 +1981,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, (end - 1) <= cpc->trim_end) continue; - if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) { + if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); continue; @@ -2798,7 +2810,7 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); trimmed += __wait_all_discard_cmd(sbi, NULL); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto next; } skip: @@ -2827,7 +2839,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) struct discard_policy dpolicy; unsigned long long trimmed = 0; int err = 0; - bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); + bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; @@ -3190,7 +3202,7 @@ static void update_device_state(struct f2fs_io_info *fio) static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio); - bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA); + bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); if (keep_order) down_read(&fio->sbi->io_order_lock); @@ -4068,7 +4080,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->dirty_sentries = 0; sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); - sit_i->mounted_time = ktime_get_real_seconds(); + sit_i->mounted_time = ktime_get_boottime_seconds(); init_rwsem(&sit_i->sentry_lock); return 0; } @@ -4418,7 +4430,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; - if (!test_opt(sbi, LFS)) + if (!f2fs_lfs_mode(sbi)) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; @@ -4570,22 +4582,22 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) int __init f2fs_create_segment_manager_caches(void) { - discard_entry_slab = f2fs_kmem_cache_create("discard_entry", + discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry", sizeof(struct discard_entry)); if (!discard_entry_slab) goto fail; - discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd", + discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd", sizeof(struct discard_cmd)); if (!discard_cmd_slab) goto destroy_discard_entry; - sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", + sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set", sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) goto destroy_discard_cmd; - inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", + inmem_entry_slab = f2fs_kmem_cache_create("f2fs_inmem_page_entry", sizeof(struct inmem_pages)); if (!inmem_entry_slab) goto destroy_sit_entry_set; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 459dc3901a574c8a1cc66ef265e5c13dea58bb8e..c476e7ca35957326ecbc29ba85d42895695e521f 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -752,11 +752,16 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) #endif } +static inline time64_t ktime_get_boottime_seconds(void) +{ + return ktime_divns(ktime_get_boottime(), NSEC_PER_SEC); +} + static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi, bool base_time) { struct sit_info *sit_i = SIT_I(sbi); - time64_t diff, now = ktime_get_real_seconds(); + time64_t diff, now = ktime_get_boottime_seconds(); if (now >= sit_i->mounted_time) return sit_i->elapsed_time + now - sit_i->mounted_time; diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index a467aca29cfefd8a25a16b125356debc54850e33..d66de5999a26d8bb4b147468e071203c82b70982 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -58,7 +58,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, /* count extent cache entries */ count += __count_extent_cache(sbi); - /* shrink clean nat cache entries */ + /* count clean nat cache entries */ count += __count_nat_entries(sbi); /* count free nids cache entries */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5b4babc7bc32229c78478d8b12d3bc3caee81afb..ecef87cee77dd1717e72b0f1f0284075ecbbfd11 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -427,14 +427,11 @@ static int parse_options(struct super_block *sb, char *options) if (!name) return -ENOMEM; if (strlen(name) == 2 && !strncmp(name, "on", 2)) { - set_opt(sbi, BG_GC); - clear_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) { - clear_opt(sbi, BG_GC); - clear_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) { - set_opt(sbi, BG_GC); - set_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; } else { kvfree(name); return -EINVAL; @@ -446,7 +443,7 @@ static int parse_options(struct super_block *sb, char *options) break; case Opt_norecovery: /* this option mounts f2fs with ro */ - set_opt(sbi, DISABLE_ROLL_FORWARD); + set_opt(sbi, NORECOVERY); if (!f2fs_readonly(sb)) return -EINVAL; break; @@ -600,10 +597,10 @@ static int parse_options(struct super_block *sb, char *options) kvfree(name); return -EINVAL; } - set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); + F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; } else if (strlen(name) == 3 && !strncmp(name, "lfs", 3)) { - set_opt_mode(sbi, F2FS_MOUNT_LFS); + F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; } else { kvfree(name); return -EINVAL; @@ -832,6 +829,10 @@ static int parse_options(struct super_block *sb, char *options) !strcmp(name, "lz4")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; + } else if (strlen(name) == 4 && + !strcmp(name, "zstd")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_ZSTD; } else { kfree(name); return -EINVAL; @@ -904,7 +905,7 @@ static int parse_options(struct super_block *sb, char *options) } #endif - if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { + if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) { f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO", F2FS_IO_SIZE_KB(sbi)); return -EINVAL; @@ -934,7 +935,7 @@ static int parse_options(struct super_block *sb, char *options) } } - if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { + if (test_opt(sbi, DISABLE_CHECKPOINT) && f2fs_lfs_mode(sbi)) { f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n"); return -EINVAL; } @@ -960,6 +961,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); init_rwsem(&fi->i_sem); + spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_ilist); @@ -1201,7 +1203,7 @@ static void f2fs_put_super(struct super_block *sb) /* our cp_error case, we can wait for any writeback page */ f2fs_flush_merged_writes(sbi); - f2fs_wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); f2fs_bug_on(sbi, sbi->fsync_node_num); @@ -1233,6 +1235,7 @@ static void f2fs_put_super(struct super_block *sb) kvfree(sbi->raw_super); destroy_device_list(sbi); + f2fs_destroy_xattr_caches(sbi); mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) @@ -1242,7 +1245,7 @@ static void f2fs_put_super(struct super_block *sb) for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif kvfree(sbi); } @@ -1449,6 +1452,9 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, case COMPRESS_LZ4: algtype = "lz4"; break; + case COMPRESS_ZSTD: + algtype = "zstd"; + break; } seq_printf(seq, ",compress_algorithm=%s", algtype); @@ -1465,16 +1471,17 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); - if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) { - if (test_opt(sbi, FORCE_FG_GC)) - seq_printf(seq, ",background_gc=%s", "sync"); - else - seq_printf(seq, ",background_gc=%s", "on"); - } else { + if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) + seq_printf(seq, ",background_gc=%s", "sync"); + else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_ON) + seq_printf(seq, ",background_gc=%s", "on"); + else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) seq_printf(seq, ",background_gc=%s", "off"); - } + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); + if (test_opt(sbi, NORECOVERY)) + seq_puts(seq, ",norecovery"); if (test_opt(sbi, DISCARD)) seq_puts(seq, ",discard"); else @@ -1526,9 +1533,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",data_flush"); seq_puts(seq, ",mode="); - if (test_opt(sbi, ADAPTIVE)) + if (F2FS_OPTION(sbi).fs_mode == FS_MODE_ADAPTIVE) seq_puts(seq, "adaptive"); - else if (test_opt(sbi, LFS)) + else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS) seq_puts(seq, "lfs"); seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); if (test_opt(sbi, RESERVE_ROOT)) @@ -1599,11 +1606,11 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).test_dummy_encryption = false; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; F2FS_OPTION(sbi).compress_ext_cnt = 0; + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; - set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); @@ -1615,9 +1622,9 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi)) - set_opt_mode(sbi, F2FS_MOUNT_LFS); + F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; else - set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); + F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); @@ -1686,7 +1693,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) out_unlock: up_write(&sbi->gc_lock); restore_flag: - sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; } @@ -1809,7 +1816,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if background_gc = off is passed in mount * option. Also sync the filesystem. */ - if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { + if ((*flags & MS_RDONLY) || + F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) { if (sbi->gc_thread) { f2fs_stop_gc_thread(sbi); need_restart_gc = true; @@ -1914,7 +1922,8 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS); if (IS_ERR(page)) { if (PTR_ERR(page) == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto repeat; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); @@ -1956,6 +1965,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, int offset = off & (sb->s_blocksize - 1); size_t towrite = len; struct page *page; + void *fsdata = NULL; char *kaddr; int err = 0; int tocopy; @@ -1965,10 +1975,11 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, towrite); retry: err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, - &page, NULL); + &page, &fsdata); if (unlikely(err)) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); @@ -1981,7 +1992,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, flush_dcache_page(page); a_ops->write_end(NULL, mapping, off, tocopy, tocopy, - page, NULL); + page, fsdata); offset = 0; towrite -= tocopy; off += tocopy; @@ -3274,7 +3285,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) { #ifdef CONFIG_UNICODE - if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) { + if (f2fs_sb_has_casefold(sbi) && !sbi->sb->s_encoding) { const struct f2fs_sb_encodings *encoding_info; struct unicode_map *encoding; __u16 encoding_flags; @@ -3305,8 +3316,8 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) "%s-%s with flags 0x%hx", encoding_info->name, encoding_info->version?:"\b", encoding_flags); - sbi->s_encoding = encoding; - sbi->s_encoding_flags = encoding_flags; + sbi->sb->s_encoding = encoding; + sbi->sb->s_encoding_flags = encoding_flags; sbi->sb->s_d_op = &f2fs_dentry_ops; } #else @@ -3463,6 +3474,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* init iostat info */ spin_lock_init(&sbi->iostat_lock); sbi->iostat_enable = false; + sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS; for (i = 0; i < NR_PAGE_TYPE; i++) { int n = (i == META) ? 1: NR_TEMP_TYPE; @@ -3507,12 +3519,17 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) } } + /* init per sbi slab cache */ + err = f2fs_init_xattr_caches(sbi); + if (err) + goto free_io_dummy; + /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); if (IS_ERR(sbi->meta_inode)) { f2fs_err(sbi, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); - goto free_io_dummy; + goto free_xattr_cache; } err = f2fs_get_valid_checkpoint(sbi); @@ -3639,7 +3656,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_err(sbi, "Cannot turn on quotas: error %d", err); } #endif - /* if there are nt orphan nodes free them */ + /* if there are any orphan inodes, free them */ err = f2fs_recover_orphan_inodes(sbi); if (err) goto free_meta; @@ -3648,7 +3665,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto reset_checkpoint; /* recover fsynced data */ - if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + if (!test_opt(sbi, DISABLE_ROLL_FORWARD) && + !test_opt(sbi, NORECOVERY)) { /* * mount should be failed, when device has readonly mode, and * previous checkpoint was not done by clean system shutdown. @@ -3703,7 +3721,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { + if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) @@ -3772,6 +3790,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); sbi->meta_inode = NULL; +free_xattr_cache: + f2fs_destroy_xattr_caches(sbi); free_io_dummy: mempool_destroy(sbi->write_io_dummy); free_percpu: @@ -3781,7 +3801,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif free_options: #ifdef CONFIG_QUOTA diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 227d3db5c9896b993529a139dc95276fc7acdf96..70893a98c0e96772c51d3655c487f8b107437b3c 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -15,6 +15,7 @@ #include "f2fs.h" #include "segment.h" #include "gc.h" +#include static struct proc_dir_entry *f2fs_proc_root; @@ -109,47 +110,47 @@ static ssize_t features_show(struct f2fs_attr *a, return sprintf(buf, "0\n"); if (f2fs_sb_has_encrypt(sbi)) - len += snprintf(buf, PAGE_SIZE - len, "%s", + len += scnprintf(buf, PAGE_SIZE - len, "%s", "encryption"); if (f2fs_sb_has_blkzoned(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "extra_attr"); if (f2fs_sb_has_project_quota(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "projquota"); if (f2fs_sb_has_inode_chksum(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_checksum"); if (f2fs_sb_has_flexible_inline_xattr(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); if (f2fs_sb_has_quota_ino(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "quota_ino"); if (f2fs_sb_has_inode_crtime(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_crtime"); if (f2fs_sb_has_lost_found(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); if (f2fs_sb_has_verity(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "verity"); if (f2fs_sb_has_sb_chksum(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "sb_checksum"); if (f2fs_sb_has_casefold(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "casefold"); if (f2fs_sb_has_compression(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "compression"); - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "pin_file"); - len += snprintf(buf + len, PAGE_SIZE - len, "\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -175,16 +176,24 @@ static ssize_t encoding_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { #ifdef CONFIG_UNICODE + struct super_block *sb = sbi->sb; + if (f2fs_sb_has_casefold(sbi)) return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n", - sbi->s_encoding->charset, - (sbi->s_encoding->version >> 16) & 0xff, - (sbi->s_encoding->version >> 8) & 0xff, - sbi->s_encoding->version & 0xff); + sb->s_encoding->charset, + (sb->s_encoding->version >> 16) & 0xff, + (sb->s_encoding->version >> 8) & 0xff, + sb->s_encoding->version & 0xff); #endif return sprintf(buf, "(none)"); } +static ssize_t mounted_time_sec_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sprintf(buf, "%llu", SIT_I(sbi)->mounted_time); +} + #ifdef CONFIG_F2FS_STAT_FS static ssize_t moved_blocks_foreground_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) @@ -233,16 +242,16 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, int hot_count = sbi->raw_super->hot_ext_count; int len = 0, i; - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "cold file extension:\n"); for (i = 0; i < cold_count; i++) - len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", extlist[i]); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "hot file extension:\n"); for (i = cold_count; i < cold_count + hot_count; i++) - len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", extlist[i]); return len; } @@ -366,7 +375,6 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } - if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; if (!sbi->iostat_enable) @@ -374,6 +382,15 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "iostat_period_ms")) { + if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS) + return -EINVAL; + spin_lock(&sbi->iostat_lock); + sbi->iostat_period_ms = (unsigned int)t; + spin_unlock(&sbi->iostat_lock); + return count; + } + *ui = (unsigned int)t; return count; @@ -530,6 +547,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); @@ -537,6 +555,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag); F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(free_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); @@ -544,6 +563,7 @@ F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); F2FS_GENERAL_RO_ATTR(unusable); F2FS_GENERAL_RO_ATTR(encoding); +F2FS_GENERAL_RO_ATTR(mounted_time_sec); #ifdef CONFIG_F2FS_STAT_FS F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); @@ -573,7 +593,9 @@ F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY); #endif F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD); +#ifdef CONFIG_F2FS_FS_COMPRESSION F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION); +#endif #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -606,6 +628,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle_interval), ATTR_LIST(umount_discard_timeout), ATTR_LIST(iostat_enable), + ATTR_LIST(iostat_period_ms), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), ATTR_LIST(extension_list), @@ -613,6 +636,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_rate), ATTR_LIST(inject_type), #endif + ATTR_LIST(data_io_flag), ATTR_LIST(dirty_segments), ATTR_LIST(free_segments), ATTR_LIST(unusable), @@ -621,6 +645,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(reserved_blocks), ATTR_LIST(current_reserved_blocks), ATTR_LIST(encoding), + ATTR_LIST(mounted_time_sec), #ifdef CONFIG_F2FS_STAT_FS ATTR_LIST(cp_foreground_calls), ATTR_LIST(cp_background_calls), @@ -653,7 +678,9 @@ static struct attribute *f2fs_feat_attrs[] = { #endif ATTR_LIST(sb_checksum), ATTR_LIST(casefold), +#ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compression), +#endif NULL, }; @@ -736,6 +763,33 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, return 0; } +void f2fs_record_iostat(struct f2fs_sb_info *sbi) +{ + unsigned long long iostat_diff[NR_IO_TYPE]; + int i; + + if (time_is_after_jiffies(sbi->iostat_next_period)) + return; + + /* Need double check under the lock */ + spin_lock(&sbi->iostat_lock); + if (time_is_after_jiffies(sbi->iostat_next_period)) { + spin_unlock(&sbi->iostat_lock); + return; + } + sbi->iostat_next_period = jiffies + + msecs_to_jiffies(sbi->iostat_period_ms); + + for (i = 0; i < NR_IO_TYPE; i++) { + iostat_diff[i] = sbi->rw_iostat[i] - + sbi->prev_rw_iostat[i]; + sbi->prev_rw_iostat[i] = sbi->rw_iostat[i]; + } + spin_unlock(&sbi->iostat_lock); + + trace_f2fs_iostat(sbi, iostat_diff); +} + static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) { @@ -748,33 +802,51 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, seq_printf(seq, "time: %-16llu\n", now); - /* print app IOs */ + /* print app write IOs */ seq_printf(seq, "app buffered: %-16llu\n", - sbi->write_iostat[APP_BUFFERED_IO]); + sbi->rw_iostat[APP_BUFFERED_IO]); seq_printf(seq, "app direct: %-16llu\n", - sbi->write_iostat[APP_DIRECT_IO]); + sbi->rw_iostat[APP_DIRECT_IO]); seq_printf(seq, "app mapped: %-16llu\n", - sbi->write_iostat[APP_MAPPED_IO]); + sbi->rw_iostat[APP_MAPPED_IO]); - /* print fs IOs */ + /* print fs write IOs */ seq_printf(seq, "fs data: %-16llu\n", - sbi->write_iostat[FS_DATA_IO]); + sbi->rw_iostat[FS_DATA_IO]); seq_printf(seq, "fs node: %-16llu\n", - sbi->write_iostat[FS_NODE_IO]); + sbi->rw_iostat[FS_NODE_IO]); seq_printf(seq, "fs meta: %-16llu\n", - sbi->write_iostat[FS_META_IO]); + sbi->rw_iostat[FS_META_IO]); seq_printf(seq, "fs gc data: %-16llu\n", - sbi->write_iostat[FS_GC_DATA_IO]); + sbi->rw_iostat[FS_GC_DATA_IO]); seq_printf(seq, "fs gc node: %-16llu\n", - sbi->write_iostat[FS_GC_NODE_IO]); + sbi->rw_iostat[FS_GC_NODE_IO]); seq_printf(seq, "fs cp data: %-16llu\n", - sbi->write_iostat[FS_CP_DATA_IO]); + sbi->rw_iostat[FS_CP_DATA_IO]); seq_printf(seq, "fs cp node: %-16llu\n", - sbi->write_iostat[FS_CP_NODE_IO]); + sbi->rw_iostat[FS_CP_NODE_IO]); seq_printf(seq, "fs cp meta: %-16llu\n", - sbi->write_iostat[FS_CP_META_IO]); + sbi->rw_iostat[FS_CP_META_IO]); + + /* print app read IOs */ + seq_printf(seq, "app buffered: %-16llu\n", + sbi->rw_iostat[APP_BUFFERED_READ_IO]); + seq_printf(seq, "app direct: %-16llu\n", + sbi->rw_iostat[APP_DIRECT_READ_IO]); + seq_printf(seq, "app mapped: %-16llu\n", + sbi->rw_iostat[APP_MAPPED_READ_IO]); + + /* print fs read IOs */ + seq_printf(seq, "fs data: %-16llu\n", + sbi->rw_iostat[FS_DATA_READ_IO]); + seq_printf(seq, "fs node: %-16llu\n", + sbi->rw_iostat[FS_NODE_READ_IO]); + seq_printf(seq, "fs meta: %-16llu\n", + sbi->rw_iostat[FS_META_READ_IO]); + + /* print other IOs */ seq_printf(seq, "fs discard: %-16llu\n", - sbi->write_iostat[FS_DISCARD]); + sbi->rw_iostat[FS_DISCARD]); return 0; } diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index a401ef72bc82136b31dfdc8eca4cdcf5bc445dd1..5905050f7fb89a6084a4fd0da743da6600f6c26f 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -222,12 +222,57 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, return size; } +/* + * Prefetch some pages from the file's Merkle tree. + * + * This is basically a stripped-down version of __do_page_cache_readahead() + * which works on pages past i_size. + */ +static void f2fs_merkle_tree_readahead(struct address_space *mapping, + pgoff_t start_index, unsigned long count) +{ + LIST_HEAD(pages); + unsigned int nr_pages = 0; + struct page *page; + pgoff_t index; + struct blk_plug plug; + + for (index = start_index; index < start_index + count; index++) { + rcu_read_lock(); + page = radix_tree_lookup(&mapping->page_tree, index); + rcu_read_unlock(); + if (!page || radix_tree_exceptional_entry(page)) { + page = __page_cache_alloc(readahead_gfp_mask(mapping)); + if (!page) + break; + page->index = index; + list_add(&page->lru, &pages); + nr_pages++; + } + } + blk_start_plug(&plug); + f2fs_mpage_readpages(mapping, &pages, NULL, nr_pages, true); + blk_finish_plug(&plug); +} + static struct page *f2fs_read_merkle_tree_page(struct inode *inode, - pgoff_t index) + pgoff_t index, + unsigned long num_ra_pages) { + struct page *page; + index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; - return read_mapping_page(inode->i_mapping, index, NULL); + page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else if (num_ra_pages > 1) + f2fs_merkle_tree_readahead(inode->i_mapping, index, + num_ra_pages); + page = read_mapping_page(inode->i_mapping, index, NULL); + } + return page; } static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 296b3189448a466c2177ace3d0423f1f31f2162b..4f6582ef7ee331b7b62c3c0a1c77e4c732f0d1a7 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -23,6 +23,25 @@ #include "xattr.h" #include "segment.h" +static void *xattr_alloc(struct f2fs_sb_info *sbi, int size, bool *is_inline) +{ + if (likely(size == sbi->inline_xattr_slab_size)) { + *is_inline = true; + return kmem_cache_zalloc(sbi->inline_xattr_slab, GFP_NOFS); + } + *is_inline = false; + return f2fs_kzalloc(sbi, size, GFP_NOFS); +} + +static void xattr_free(struct f2fs_sb_info *sbi, void *xattr_addr, + bool is_inline) +{ + if (is_inline) + kmem_cache_free(sbi->inline_xattr_slab, xattr_addr); + else + kvfree(xattr_addr); +} + static int f2fs_xattr_generic_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) @@ -301,7 +320,8 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr) static int lookup_all_xattrs(struct inode *inode, struct page *ipage, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, - void **base_addr, int *base_size) + void **base_addr, int *base_size, + bool *is_inline) { void *cur_addr, *txattr_addr, *last_txattr_addr; void *last_addr = NULL; @@ -312,12 +332,12 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, if (!xnid && !inline_size) return -ENODATA; - *base_size = XATTR_SIZE(xnid, inode) + XATTR_PADDING_SIZE; - txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); + *base_size = XATTR_SIZE(inode) + XATTR_PADDING_SIZE; + txattr_addr = xattr_alloc(F2FS_I_SB(inode), *base_size, is_inline); if (!txattr_addr) return -ENOMEM; - last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(xnid, inode); + last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(inode); /* read from inline xattr */ if (inline_size) { @@ -362,7 +382,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *base_addr = txattr_addr; return 0; out: - kvfree(txattr_addr); + xattr_free(F2FS_I_SB(inode), txattr_addr, *is_inline); return err; } @@ -499,6 +519,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, unsigned int size, len; void *base_addr = NULL; int base_size; + bool is_inline; if (name == NULL) return -EINVAL; @@ -509,7 +530,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, - &entry, &base_addr, &base_size); + &entry, &base_addr, &base_size, &is_inline); up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -532,14 +553,13 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, } error = size; out: - kvfree(base_addr); + xattr_free(F2FS_I_SB(inode), base_addr, is_inline); return error; } ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { struct inode *inode = d_inode(dentry); - nid_t xnid = F2FS_I(inode)->i_xattr_nid; struct f2fs_xattr_entry *entry; void *base_addr, *last_base_addr; int error = 0; @@ -551,7 +571,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (error) return error; - last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + last_base_addr = (void *)base_addr + XATTR_SIZE(inode); list_for_each_xattr(entry, base_addr) { const struct xattr_handler *handler = @@ -609,7 +629,6 @@ static int __f2fs_setxattr(struct inode *inode, int index, { struct f2fs_xattr_entry *here, *last; void *base_addr, *last_base_addr; - nid_t xnid = F2FS_I(inode)->i_xattr_nid; int found, newsize; size_t len; __u32 new_hsize; @@ -633,7 +652,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (error) return error; - last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + last_base_addr = (void *)base_addr + XATTR_SIZE(inode); /* find entry with wanted name. */ here = __find_xattr(base_addr, last_base_addr, index, len, name); @@ -758,14 +777,34 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - /* protect xattr_ver */ - down_write(&F2FS_I(inode)->i_sem); down_write(&F2FS_I(inode)->i_xattr_sem); err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); up_write(&F2FS_I(inode)->i_xattr_sem); - up_write(&F2FS_I(inode)->i_sem); f2fs_unlock_op(sbi); f2fs_update_time(sbi, REQ_TIME); return err; } + +int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) +{ + dev_t dev = sbi->sb->s_bdev->bd_dev; + char slab_name[32]; + + sprintf(slab_name, "f2fs_xattr_entry-%u:%u", MAJOR(dev), MINOR(dev)); + + sbi->inline_xattr_slab_size = F2FS_OPTION(sbi).inline_xattr_size * + sizeof(__le32) + XATTR_PADDING_SIZE; + + sbi->inline_xattr_slab = f2fs_kmem_cache_create(slab_name, + sbi->inline_xattr_slab_size); + if (!sbi->inline_xattr_slab) + return -ENOMEM; + + return 0; +} + +void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) +{ + kmem_cache_destroy(sbi->inline_xattr_slab); +} diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index de0c600b9cab0973cf2b8c5ba2b4c9e45da1214c..938fcd20565dccfdaa3a37a3cdd64abe4e1ab831 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -49,7 +49,7 @@ struct f2fs_xattr_entry { __u8 e_name_index; __u8 e_name_len; __le16 e_value_size; /* size of attribute value */ - char e_name[0]; /* attribute name */ + char e_name[]; /* attribute name */ }; #define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr)) @@ -73,7 +73,8 @@ struct f2fs_xattr_entry { entry = XATTR_NEXT_ENTRY(entry)) #define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) #define XATTR_PADDING_SIZE (sizeof(__u32)) -#define XATTR_SIZE(x,i) (((x) ? VALID_XATTR_BLOCK_SIZE : 0) + \ +#define XATTR_SIZE(i) ((F2FS_I(i)->i_xattr_nid ? \ + VALID_XATTR_BLOCK_SIZE : 0) + \ (inline_xattr_size(i))) #define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \ VALID_XATTR_BLOCK_SIZE) @@ -130,6 +131,8 @@ extern int f2fs_setxattr(struct inode *, int, const char *, extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t, struct page *); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); +extern int f2fs_init_xattr_caches(struct f2fs_sb_info *); +extern void f2fs_destroy_xattr_caches(struct f2fs_sb_info *); #else #define f2fs_xattr_handlers NULL @@ -150,6 +153,8 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, { return -EOPNOTSUPP; } +static inline int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } #endif #ifdef CONFIG_F2FS_FS_SECURITY diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 204d5ca813da3ef942ba481425f30d321078da51..154728d207c343462bde4f38665e52681af228e4 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -734,6 +734,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb) return NULL; init_rwsem(&ei->truncate_lock); + /* Zeroing to allow iput() even if partial initialized inode. */ + ei->mmu_private = 0; + ei->i_start = 0; + ei->i_logstart = 0; + ei->i_attrs = 0; + ei->i_pos = 0; + return &ei->vfs_inode; } @@ -1364,16 +1371,6 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, return 0; } -static void fat_dummy_inode_init(struct inode *inode) -{ - /* Initialize this dummy inode to work as no-op. */ - MSDOS_I(inode)->mmu_private = 0; - MSDOS_I(inode)->i_start = 0; - MSDOS_I(inode)->i_logstart = 0; - MSDOS_I(inode)->i_attrs = 0; - MSDOS_I(inode)->i_pos = 0; -} - static int fat_read_root(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1818,13 +1815,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; - fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); diff --git a/fs/filesystems.c b/fs/filesystems.c index 931925d6aeed39fd2666ddb2cbe364e53963caf4..9829317d39c8da159bf2c9d371a290a7ebd7b753 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -298,7 +298,9 @@ struct file_system_type *get_fs_type(const char *name) fs = __get_fs_type(name, len); if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); - WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); + if (!fs) + pr_warn_once("request_module fs-%.*s succeeded, but still no fs?\n", + len, name); } if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index aea1ed0aebd0f6a615b8118775583e7e8e21ad43..1e2ff4b32c79aea44981c0e8cc926b32640dea8f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -636,6 +636,9 @@ __acquires(&gl->gl_lockref.lock) goto out_unlock; if (nonblock) goto out_sched; + smp_mb(); + if (atomic_read(&gl->gl_revokes) != 0) + goto out_sched; set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); gl->gl_target = gl->gl_demote_state; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c850579ae5a46f3139c82324da85e01473f9b2d0..6c6401084d3d8f72c20b3e3c208234b8f0a895b5 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1255,7 +1255,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (!(*opened & FILE_OPENED)) return finish_no_open(file, d); dput(d); - return 0; + return excl && (flags & O_CREAT) ? -EEXIST : 0; } BUG_ON(d != NULL); diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index e6d554476db41cf61f1d850682f2aff0583e9122..eeebe80c6be4aa39878bee63521d2558d9a189da 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c @@ -292,6 +292,10 @@ static int __hfsplus_delete_attr(struct inode *inode, u32 cnid, return -ENOENT; } + /* Avoid btree corruption */ + hfs_bnode_read(fd->bnode, fd->search_key, + fd->keyoffset, fd->keylength); + err = hfs_brec_remove(fd); if (err) return err; diff --git a/fs/inode.c b/fs/inode.c index c7e140686a348f6c813e59d82714a65e56f3110a..4e30a37ef712674d54e96370d3da5b1b7077c4b6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -135,6 +135,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; + atomic64_set(&inode->i_sequence, 0); atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; inode->i_fop = &no_open_fops; diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index fe4fe155b7fbe9e2ca6bf923f17d387a6c4f8a25..15d129b7494b0e6861f5ef0242c0a18365a1aa1b 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -168,7 +168,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) "journal space in %s\n", __func__, journal->j_devname); WARN_ON(1); - jbd2_journal_abort(journal, 0); + jbd2_journal_abort(journal, -EIO); } write_lock(&journal->j_state_lock); } else { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7dd61339259215927d047204390bebeeab5f52ab..6870103a0f59cfdcc6f649ba910fe1767e89f42c 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -783,7 +783,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } blk_finish_plug(&plug); @@ -876,7 +876,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } if (cbh) err = journal_wait_on_commit_record(journal, cbh); @@ -973,29 +973,34 @@ void jbd2_journal_commit_transaction(journal_t *journal) * it. */ /* - * A buffer which has been freed while still being journaled by - * a previous transaction. - */ - if (buffer_freed(bh)) { + * A buffer which has been freed while still being journaled + * by a previous transaction, refile the buffer to BJ_Forget of + * the running transaction. If the just committed transaction + * contains "add to orphan" operation, we can completely + * invalidate the buffer now. We are rather through in that + * since the buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial page. + */ + if (buffer_freed(bh) && !jh->b_next_transaction) { + struct address_space *mapping; + + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + /* - * If the running transaction is the one containing - * "add to orphan" operation (b_next_transaction != - * NULL), we have to wait for that transaction to - * commit before we can really get rid of the buffer. - * So just clear b_modified to not confuse transaction - * credit accounting and refile the buffer to - * BJ_Forget of the running transaction. If the just - * committed transaction contains "add to orphan" - * operation, we can completely invalidate the buffer - * now. We are rather through in that since the - * buffer may be still accessible when blocksize < - * pagesize and it is attached to the last partial - * page. + * Block device buffers need to stay mapped all the + * time, so it is enough to clear buffer_jbddirty and + * buffer_freed bits. For the file mapping buffers (i.e. + * journalled data) we need to unmap buffer and clear + * more bits. We also need to be careful about the check + * because the data page mapping can get cleared under + * our hands. Note that if mapping == NULL, we don't + * need to make buffer unmapped because the page is + * already detached from the mapping and buffers cannot + * get reused. */ - jh->b_modified = 0; - if (!jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); + mapping = READ_ONCE(bh->b_page->mapping); + if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) { clear_buffer_mapped(bh); clear_buffer_new(bh); clear_buffer_req(bh); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index d3cce5c86fd905622b97d606bd311db82980d8b3..6e054b368b5feb2bf01509390bdaec1b42aa3eeb 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1687,6 +1687,11 @@ int jbd2_journal_load(journal_t *journal) journal->j_devname); return -EFSCORRUPTED; } + /* + * clear JBD2_ABORT flag initialized in journal_init_common + * here to update log tail information with the newest seq. + */ + journal->j_flags &= ~JBD2_ABORT; /* OK, we've finished with the dynamic journal bits: * reinitialise the dynamic contents of the superblock in memory @@ -1694,7 +1699,6 @@ int jbd2_journal_load(journal_t *journal) if (journal_reset(journal)) goto recovery_error; - journal->j_flags &= ~JBD2_ABORT; journal->j_flags |= JBD2_LOADED; return 0; @@ -2115,8 +2119,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) if (journal->j_flags & JBD2_ABORT) { write_unlock(&journal->j_state_lock); - if (!old_errno && old_errno != -ESHUTDOWN && - errno == -ESHUTDOWN) + if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) jbd2_journal_update_sb_errno(journal); return; } @@ -2124,12 +2127,10 @@ static void __journal_abort_soft (journal_t *journal, int errno) __jbd2_journal_abort_hard(journal); - if (errno) { - jbd2_journal_update_sb_errno(journal); - write_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_REC_ERR; - write_unlock(&journal->j_state_lock); - } + jbd2_journal_update_sb_errno(journal); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_REC_ERR; + write_unlock(&journal->j_state_lock); } /** @@ -2171,11 +2172,6 @@ static void __journal_abort_soft (journal_t *journal, int errno) * failure to disk. ext3_error, for example, now uses this * functionality. * - * Errors which originate from within the journaling layer will NOT - * supply an errno; a null errno implies that absolutely no further - * writes are done to the journal (unless there are any already in - * progress). - * */ void jbd2_journal_abort(journal_t *journal, int errno) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 7fe422eced89ba3c3dc1ea818566083b918708d5..a355ca418e788b8655897f3b51246b9c182ee9cf 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1050,8 +1050,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, /* For undo access buffer must have data copied */ if (undo && !jh->b_committed_data) goto out; - if (jh->b_transaction != handle->h_transaction && - jh->b_next_transaction != handle->h_transaction) + if (READ_ONCE(jh->b_transaction) != handle->h_transaction && + READ_ONCE(jh->b_next_transaction) != handle->h_transaction) goto out; /* * There are two reasons for the barrier here: @@ -2231,14 +2231,16 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, return -EBUSY; } /* - * OK, buffer won't be reachable after truncate. We just set - * j_next_transaction to the running transaction (if there is - * one) and mark buffer as freed so that commit code knows it - * should clear dirty bits when it is done with the buffer. + * OK, buffer won't be reachable after truncate. We just clear + * b_modified to not confuse transaction credit accounting, and + * set j_next_transaction to the running transaction (if there + * is one) and mark buffer as freed so that commit code knows + * it should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) jh->b_next_transaction = journal->j_running_transaction; + jh->b_modified = 0; jbd2_journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); @@ -2464,8 +2466,8 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) * our jh reference and thus __jbd2_journal_file_buffer() must not * take a new one. */ - jh->b_transaction = jh->b_next_transaction; - jh->b_next_transaction = NULL; + WRITE_ONCE(jh->b_transaction, jh->b_next_transaction); + WRITE_ONCE(jh->b_next_transaction, NULL); if (buffer_freed(bh)) jlist = BJ_Forget; else if (jh->b_modified) diff --git a/fs/libfs.c b/fs/libfs.c index 49623301e5f0b9a3d7756629b71dd7aa4746990b..f66eb521d4f861a0e7644811172c3146b1ae768e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -16,6 +16,8 @@ #include #include #include /* sync_mapping_buffers */ +#include +#include #include @@ -802,7 +804,7 @@ int simple_attr_open(struct inode *inode, struct file *file, { struct simple_attr *attr; - attr = kmalloc(sizeof(*attr), GFP_KERNEL); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; @@ -842,9 +844,11 @@ ssize_t simple_attr_read(struct file *file, char __user *buf, if (ret) return ret; - if (*ppos) { /* continued read */ + if (*ppos && attr->get_buf[0]) { + /* continued read */ size = strlen(attr->get_buf); - } else { /* first read */ + } else { + /* first read */ u64 val; ret = attr->get(attr->data, &val); if (ret) @@ -1219,3 +1223,62 @@ bool is_empty_dir_inode(struct inode *inode) return (inode->i_fop == &empty_dir_operations) && (inode->i_op == &empty_dir_inode_operations); } + +#ifdef CONFIG_UNICODE +bool needs_casefold(const struct inode *dir) +{ + return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding && + (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir)); +} +EXPORT_SYMBOL(needs_casefold); + +int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *inode = READ_ONCE(parent->d_inode); + const struct super_block *sb = dentry->d_sb; + const struct unicode_map *um = sb->s_encoding; + struct qstr entry = QSTR_INIT(str, len); + int ret; + + if (!inode || !needs_casefold(inode)) + goto fallback; + + ret = utf8_strncasecmp(um, name, &entry); + if (ret >= 0) + return ret; + + if (sb_has_enc_strict_mode(sb)) + return -EINVAL; +fallback: + if (len != name->len) + return 1; + return !!memcmp(str, name->name, len); +} +EXPORT_SYMBOL(generic_ci_d_compare); + +int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) +{ + const struct inode *inode = READ_ONCE(dentry->d_inode); + struct super_block *sb = dentry->d_sb; + const struct unicode_map *um = sb->s_encoding; + int ret = 0; + + if (!inode || !needs_casefold(inode)) + return 0; + + ret = utf8_casefold_hash(um, dentry, str); + if (ret < 0) + goto err; + + return 0; +err: + if (sb_has_enc_strict_mode(sb)) + ret = -EINVAL; + else + ret = 0; + return ret; +} +EXPORT_SYMBOL(generic_ci_d_hash); +#endif diff --git a/fs/namei.c b/fs/namei.c index 7c876d8734ea9abfc7662e9ef61aadc755b8468a..6c933d1cc941a9559949a6883a06ff1bf9db3e9a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1481,7 +1481,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) nd->path.dentry = parent; nd->seq = seq; if (unlikely(!path_connected(&nd->path))) - return -ENOENT; + return -ECHILD; break; } else { struct mount *mnt = real_mount(nd->path.mnt); diff --git a/fs/namespace.c b/fs/namespace.c index 68549a0d10547d3fda0b5ea5860c6280c4064b3f..53db6cdba8eae3f1206dbc348bd621852c9f8c60 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3275,8 +3275,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* make certain new is below the root */ if (!is_path_reachable(new_mnt, new.dentry, &root)) goto out4; - root_mp->m_count++; /* pin it so it won't go away */ lock_mount_hash(); + root_mp->m_count++; /* pin it so it won't go away */ detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index b8d55da2f04d5299c84ab640b44dc4c21de0bbf7..440ff8e7082b6321e523096f279ec7bf04e0b6a2 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -127,6 +127,8 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp, restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (!pnfs_layout_is_valid(lo)) + continue; if (stateid != NULL && !nfs4_stateid_match_other(stateid, &lo->plh_stateid)) continue; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9d07b53e1647b537d0d7e977b7339a0594c19c88..e6ea4511c41ceba0d711458666346b445c878c8d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -600,6 +600,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { result = PTR_ERR(l_ctx); + nfs_direct_req_release(dreq); goto out_release; } dreq->l_ctx = l_ctx; @@ -1023,6 +1024,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { result = PTR_ERR(l_ctx); + nfs_direct_req_release(dreq); goto out_release; } dreq->l_ctx = l_ctx; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 7173a4ee862cb87af4cbab0314032c412a6bf3e8..5e9f9c70fe701590983e3e2e77887c48a93983b7 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -253,37 +253,45 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - struct posix_acl *alloc = NULL, *dfacl = NULL; + struct posix_acl *orig = acl, *dfacl = NULL, *alloc; int status; if (S_ISDIR(inode->i_mode)) { switch(type) { case ACL_TYPE_ACCESS: - alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT); + alloc = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(alloc)) goto fail; + dfacl = alloc; break; case ACL_TYPE_DEFAULT: - dfacl = acl; - alloc = acl = get_acl(inode, ACL_TYPE_ACCESS); + alloc = get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(alloc)) goto fail; + dfacl = acl; + acl = alloc; break; } } if (acl == NULL) { - alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + alloc = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); if (IS_ERR(alloc)) goto fail; + acl = alloc; } status = __nfs3_proc_setacls(inode, acl, dfacl); - posix_acl_release(alloc); +out: + if (acl != orig) + posix_acl_release(acl); + if (dfacl != orig) + posix_acl_release(dfacl); return status; fail: - return PTR_ERR(alloc); + status = PTR_ERR(alloc); + goto out; } const struct xattr_handler *nfs3_xattr_handlers[] = { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ceb6892d9bbdcb3b332a499bb06fedb8e04e9f9f..7c01936be7c706f50e0c9bdae61aa815d24e7b8c 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -864,15 +864,6 @@ static void nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, pgio->pg_mirror_count = mirror_count; } -/* - * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) - */ -void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) -{ - pgio->pg_mirror_count = 1; - pgio->pg_mirror_idx = 0; -} - static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) { pgio->pg_mirror_count = 1; @@ -1301,6 +1292,14 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) } } +/* + * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) + */ +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) +{ + nfs_pageio_complete(pgio); +} + int __init nfs_init_nfspagecache(void) { nfs_page_cachep = kmem_cache_create("nfs_page", diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 89f36040adf6222cbb26bf481b6235b82bde73f6..7b6bda68aa86a3b5eca6574a484788c2becab445 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -422,6 +422,7 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, } subreq->wb_head = subreq; + nfs_release_request(old_head); if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) { nfs_release_request(subreq); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fca8b2e7fbeb7b2e327e1f6264fc3169c9a7327e..d5d1c70bb927b6d828b6ff0787fba4f2a8d09902 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -246,6 +246,8 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, if (!nbl) { nbl= kmalloc(sizeof(*nbl), GFP_KERNEL); if (nbl) { + INIT_LIST_HEAD(&nbl->nbl_list); + INIT_LIST_HEAD(&nbl->nbl_lru); fh_copy_shallow(&nbl->nbl_fh, fh); locks_init_lock(&nbl->nbl_lock); nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index addd7c5f2d3e522a7a65bd51b0164ef705402605..bed54e8adcf99683fbd665565babe39633090c3f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7240,6 +7240,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_inline_data *idata = &di->id2.i_data; + /* No need to punch hole beyond i_size. */ + if (start >= i_size_read(inode)) + return 0; + if (end > i_size_read(inode)) end = i_size_read(inode); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 497a4171ef61f6209a32303530b79c72439962cf..bfb50fc51528ffbf9d7c20c7b8b9872475991029 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -637,9 +637,11 @@ static inline void ocfs2_update_inode_fsync_trans(handle_t *handle, { struct ocfs2_inode_info *oi = OCFS2_I(inode); - oi->i_sync_tid = handle->h_transaction->t_tid; - if (datasync) - oi->i_datasync_tid = handle->h_transaction->t_tid; + if (!is_handle_aborted(handle)) { + oi->i_sync_tid = handle->h_transaction->t_tid; + if (datasync) + oi->i_datasync_tid = handle->h_transaction->t_tid; + } } #endif /* OCFS2_JOURNAL_H */ diff --git a/fs/open.c b/fs/open.c index c7776aa098bc76dc2d9c68df1213363c3af024e3..0628bd1ae2ad69356c706122d19e8bd263bebb99 100644 --- a/fs/open.c +++ b/fs/open.c @@ -838,9 +838,6 @@ static int do_dentry_open(struct file *f, * the return value of d_splice_alias(), then the caller needs to perform dput() * on it after finish_open(). * - * On successful return @file is a fully instantiated open file. After this, if - * an error occurs in ->atomic_open(), it needs to clean up with fput(). - * * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 1c59dff530dee5d10eadcb671798367ee40e173b..34d1cc98260d23dd8d3df5d9f87613be06766c7c 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -305,6 +305,7 @@ static void *help_start(struct seq_file *m, loff_t *pos) static void *help_next(struct seq_file *m, void *v, loff_t *pos) { + (*pos)++; gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n"); return NULL; diff --git a/fs/pnode.c b/fs/pnode.c index 681916df422c777a8743608623258d1ffecbe501..1fc2e47d130827a3897d5309abcfbbd79ef77b34 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -266,14 +266,13 @@ static int propagate_one(struct mount *m) if (IS_ERR(child)) return PTR_ERR(child); child->mnt.mnt_flags &= ~MNT_LOCKED; + read_seqlock_excl(&mount_lock); mnt_set_mountpoint(m, mp, child); + if (m->mnt_master != dest_master) + SET_MNT_MARK(m->mnt_master); + read_sequnlock_excl(&mount_lock); last_dest = m; last_source = child; - if (m->mnt_master != dest_master) { - read_seqlock_excl(&mount_lock); - SET_MNT_MARK(m->mnt_master); - read_sequnlock_excl(&mount_lock); - } hlist_add_head(&child->mnt_hash, list); return count_mounts(m->mnt_ns, child); } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index ce400f97370d3fd509f46103bedf98abb536feaf..aaa7486b6f0d9e42a850c9f154d3dcfdceb34e95 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -459,7 +459,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); kaddr = elfnotes_buf + start - elfcorebuf_sz; if (remap_vmalloc_range_partial(vma, vma->vm_start + len, - kaddr, tsz)) + kaddr, 0, tsz)) goto fail; size -= tsz; start += tsz; diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 0037aea97d39a6c73eff9755d805bc2312b552a3..2946713cb00d6cfe327dd6e86ac3b452d6cc34fb 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -2250,7 +2250,8 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, /* also releases the path */ unfix_nodes(&s_ins_balance); #ifdef REISERQUOTA_DEBUG - reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, + if (inode) + reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5208d85dd30c55f711b7892601541e401c931ad3..9caf3948417c0c6e5b073c4da6e005d5eb70ad55 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1954,7 +1954,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (!sbi->s_jdev) { SWARN(silent, s, "", "Cannot allocate memory for " "journal device name"); - goto error; + goto error_unlocked; } } #ifdef CONFIG_QUOTA diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 4dd681e0d59d948374883a7e34ed8318d4537e23..edeca118cce5008ccec9d35ccae19513c62bcc36 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -87,6 +87,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, lower_dentry_mnt = lower_path.mnt; lower_parent_dentry = lock_parent(lower_dentry); + if (d_is_positive(lower_dentry)) + return -EEXIST; + /* set last 16bytes of mode field to 0664 */ mode = (mode & S_IFMT) | 00664; diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index a9e28ae8091f7387c070434a2f34125a1c03ee8b..886aee2799203a948190d61a6c01db5d0da3fe37 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -257,7 +257,6 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, struct dentry *lower_dentry; const struct qstr *name; struct path lower_path; - struct qstr dname; struct dentry *ret_dentry = NULL; struct sdcardfs_sb_info *sbi; @@ -316,6 +315,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, /* no error: handle positive dentries */ if (!err) { +found: /* check if the dentry is an obb dentry * if true, the lower_inode must be replaced with * the inode of the graft path @@ -362,23 +362,26 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, if (err && err != -ENOENT) goto out; - /* instatiate a new negative dentry */ - dname.name = name->name; - dname.len = name->len; - - /* See if the low-level filesystem might want - * to use its own hash - */ - lower_dentry = lookup_one_len_unlocked(dname.name, lower_dir_dentry, - dname.len); - if (IS_ERR(lower_dentry)) - return lower_dentry; - - if (d_really_is_negative(lower_dentry)) - err = -ENOENT; + /* get a (very likely) new negative dentry */ + lower_dentry = lookup_one_len_unlocked(name->name, + lower_dir_dentry, name->len); + if (IS_ERR(lower_dentry)) { + err = PTR_ERR(lower_dentry); + goto out; + } lower_path.dentry = lower_dentry; lower_path.mnt = mntget(lower_dir_mnt); + + /* + * Check if someone sneakily filled in the dentry when + * we weren't looking. We'll check again in create. + */ + if (unlikely(d_inode_rcu(lower_dentry))) { + err = 0; + goto found; + } + sdcardfs_set_lower_path(dentry, &lower_path); /* diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 3b17cc4f18fab949247482d82f4a6673191e09e5..26ac11d0eb4b357efda41a4b441781dd124246cc 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -237,9 +237,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, goto out_fname; } - if (nm.hash) { - ubifs_assert(fname_len(&nm) == 0); - ubifs_assert(fname_name(&nm) == NULL); + if (fname_name(&nm) == NULL) { if (nm.hash & ~UBIFS_S_KEY_HASH_MASK) goto done; /* ENOENT */ dent_key_init_hash(c, &key, dir->i_ino, nm.hash); diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 69051f7a960667502894a8ba3cd357a21be7e1ac..99d31a64f2a394786725996f217310b11536178f 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -583,7 +583,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (!xent) { dent->ch.node_type = UBIFS_DENT_NODE; - if (nm->hash) + if (fname_name(nm) == NULL) dent_key_init_hash(c, &dent_key, dir->i_ino, nm->hash); else dent_key_init(c, &dent_key, dir->i_ino, nm); @@ -630,7 +630,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, kfree(dent); if (deletion) { - if (nm->hash) + if (fname_name(nm) == NULL) err = ubifs_tnc_remove_dh(c, &dent_key, nm->minor_hash); else err = ubifs_tnc_remove_nm(c, &dent_key, nm); diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index b1f7c0caa3acb60f53409d306da192b21b253e49..7547be512db2632153ce20822dd0063dcdc5cddd 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -162,7 +162,6 @@ static inline void dent_key_init(const struct ubifs_info *c, uint32_t hash = c->key_hash(fname_name(nm), fname_len(nm)); ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); - ubifs_assert(!nm->hash && !nm->minor_hash); key->u32[0] = inum; key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); } diff --git a/fs/udf/super.c b/fs/udf/super.c index 242d960df9a17a2171a3824b2dea6405a05dec81..51de27685e185efd6539b8be69e05ae642359b53 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2467,17 +2467,29 @@ static unsigned int udf_count_free_table(struct super_block *sb, static unsigned int udf_count_free(struct super_block *sb) { unsigned int accum = 0; - struct udf_sb_info *sbi; + struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; + unsigned int part = sbi->s_partition; + int ptype = sbi->s_partmaps[part].s_partition_type; + + if (ptype == UDF_METADATA_MAP25) { + part = sbi->s_partmaps[part].s_type_specific.s_metadata. + s_phys_partition_ref; + } else if (ptype == UDF_VIRTUAL_MAP15 || ptype == UDF_VIRTUAL_MAP20) { + /* + * Filesystems with VAT are append-only and we cannot write to + * them. Let's just report 0 here. + */ + return 0; + } - sbi = UDF_SB(sb); if (sbi->s_lvid_bh) { struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *) sbi->s_lvid_bh->b_data; - if (le32_to_cpu(lvid->numOfPartitions) > sbi->s_partition) { + if (le32_to_cpu(lvid->numOfPartitions) > part) { accum = le32_to_cpu( - lvid->freeSpaceTable[sbi->s_partition]); + lvid->freeSpaceTable[part]); if (accum == 0xFFFFFFFF) accum = 0; } @@ -2486,7 +2498,7 @@ static unsigned int udf_count_free(struct super_block *sb) if (accum) return accum; - map = &sbi->s_partmaps[sbi->s_partition]; + map = &sbi->s_partmaps[part]; if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { accum += udf_count_free_bitmap(sb, map->s_uspace.s_bitmap); diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 71ca4d047d6543701dbbdcc2056348534a25e96f..d18789f276503136ea33ba25fedceeea796e9463 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "utf8n.h" @@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str, } return -EINVAL; } - EXPORT_SYMBOL(utf8_casefold); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str) +{ + const struct utf8data *data = utf8nfdicf(um->version); + struct utf8cursor cur; + int c; + unsigned long hash = init_name_hash(salt); + + if (utf8ncursor(&cur, data, str->name, str->len) < 0) + return -EINVAL; + + while ((c = utf8byte(&cur))) { + if (c < 0) + return c; + hash = partial_name_hash((unsigned char)c, hash); + } + str->hash = end_name_hash(hash); + return 0; +} +EXPORT_SYMBOL(utf8_casefold_hash); + int utf8_normalize(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { diff --git a/fs/verity/enable.c b/fs/verity/enable.c index eabc6ac1990641fc6b4c54108d32328a9bee95c6..15e7d14ec2ffc7e09b77dff8b235b351baa2f3f5 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -8,18 +8,48 @@ #include "fsverity_private.h" #include +#include #include #include #include #include -static int build_merkle_tree_level(struct inode *inode, unsigned int level, +/* + * Read a file data page for Merkle tree construction. Do aggressive readahead, + * since we're sequentially reading the entire file. + */ +static struct page *read_file_data_page(struct file *filp, pgoff_t index, + struct file_ra_state *ra, + unsigned long remaining_pages) +{ + struct page *page; + + page = find_get_page_flags(filp->f_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else + page_cache_sync_readahead(filp->f_mapping, ra, filp, + index, remaining_pages); + page = read_mapping_page(filp->f_mapping, index, NULL); + if (IS_ERR(page)) + return page; + } + if (PageReadahead(page)) + page_cache_async_readahead(filp->f_mapping, ra, filp, page, + index, remaining_pages); + return page; +} + +static int build_merkle_tree_level(struct file *filp, unsigned int level, u64 num_blocks_to_hash, const struct merkle_tree_params *params, u8 *pending_hashes, struct ahash_request *req) { + struct inode *inode = file_inode(filp); const struct fsverity_operations *vops = inode->i_sb->s_vop; + struct file_ra_state ra = { 0 }; unsigned int pending_size = 0; u64 dst_block_num; u64 i; @@ -36,6 +66,8 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, dst_block_num = 0; /* unused */ } + file_ra_state_init(&ra, filp->f_mapping); + for (i = 0; i < num_blocks_to_hash; i++) { struct page *src_page; @@ -45,7 +77,8 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, if (level == 0) { /* Leaf: hashing a data block */ - src_page = read_mapping_page(inode->i_mapping, i, NULL); + src_page = read_file_data_page(filp, i, &ra, + num_blocks_to_hash - i); if (IS_ERR(src_page)) { err = PTR_ERR(src_page); fsverity_err(inode, @@ -54,9 +87,14 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, return err; } } else { + unsigned long num_ra_pages = + min_t(unsigned long, num_blocks_to_hash - i, + inode->i_sb->s_bdi->io_pages); + /* Non-leaf: hashing hash block from level below */ src_page = vops->read_merkle_tree_page(inode, - params->level_start[level - 1] + i); + params->level_start[level - 1] + i, + num_ra_pages); if (IS_ERR(src_page)) { err = PTR_ERR(src_page); fsverity_err(inode, @@ -103,17 +141,18 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, } /* - * Build the Merkle tree for the given inode using the given parameters, and + * Build the Merkle tree for the given file using the given parameters, and * return the root hash in @root_hash. * * The tree is written to a filesystem-specific location as determined by the * ->write_merkle_tree_block() method. However, the blocks that comprise the * tree are the same for all filesystems. */ -static int build_merkle_tree(struct inode *inode, +static int build_merkle_tree(struct file *filp, const struct merkle_tree_params *params, u8 *root_hash) { + struct inode *inode = file_inode(filp); u8 *pending_hashes; struct ahash_request *req; u64 blocks; @@ -126,9 +165,11 @@ static int build_merkle_tree(struct inode *inode, return 0; } + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(params->hash_alg, GFP_KERNEL); + pending_hashes = kmalloc(params->block_size, GFP_KERNEL); - req = ahash_request_alloc(params->hash_alg->tfm, GFP_KERNEL); - if (!pending_hashes || !req) + if (!pending_hashes) goto out; /* @@ -139,7 +180,7 @@ static int build_merkle_tree(struct inode *inode, blocks = (inode->i_size + params->block_size - 1) >> params->log_blocksize; for (level = 0; level <= params->num_levels; level++) { - err = build_merkle_tree_level(inode, level, blocks, params, + err = build_merkle_tree_level(filp, level, blocks, params, pending_hashes, req); if (err) goto out; @@ -150,7 +191,7 @@ static int build_merkle_tree(struct inode *inode, err = 0; out: kfree(pending_hashes); - ahash_request_free(req); + fsverity_free_hash_request(params->hash_alg, req); return err; } @@ -175,8 +216,7 @@ static int enable_verity(struct file *filp, /* Get the salt if the user provided one */ if (arg->salt_size && - copy_from_user(desc->salt, - (const u8 __user *)(uintptr_t)arg->salt_ptr, + copy_from_user(desc->salt, u64_to_user_ptr(arg->salt_ptr), arg->salt_size)) { err = -EFAULT; goto out; @@ -185,8 +225,7 @@ static int enable_verity(struct file *filp, /* Get the signature if the user provided one */ if (arg->sig_size && - copy_from_user(desc->signature, - (const u8 __user *)(uintptr_t)arg->sig_ptr, + copy_from_user(desc->signature, u64_to_user_ptr(arg->sig_ptr), arg->sig_size)) { err = -EFAULT; goto out; @@ -227,7 +266,7 @@ static int enable_verity(struct file *filp, */ pr_debug("Building Merkle tree...\n"); BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE); - err = build_merkle_tree(inode, ¶ms, desc->root_hash); + err = build_merkle_tree(filp, ¶ms, desc->root_hash); if (err) { fsverity_err(inode, "Error %d building Merkle tree", err); goto rollback; diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index e74c79b64d8898e4778cfe704ae536c77e5daa6b..4b2c8aed0563103ad04f8870a8db7e95f13a51cc 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -16,6 +16,7 @@ #include #include +#include struct ahash_request; @@ -37,11 +38,12 @@ struct fsverity_hash_alg { const char *name; /* crypto API name, e.g. sha256 */ unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */ unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */ + mempool_t *req_pool; /* mempool with a preallocated hash request */ }; /* Merkle tree parameters: hash algorithm, initial hash state, and topology */ struct merkle_tree_params { - const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */ + struct fsverity_hash_alg *hash_alg; /* the hash algorithm */ const u8 *hashstate; /* initial hash state or NULL */ unsigned int digest_size; /* same as hash_alg->digest_size */ unsigned int block_size; /* size of data and tree blocks */ @@ -50,6 +52,7 @@ struct merkle_tree_params { unsigned int log_arity; /* log2(hashes_per_block) */ unsigned int num_levels; /* number of levels in Merkle tree */ u64 tree_size; /* Merkle tree size in bytes */ + unsigned long level0_blocks; /* number of blocks in tree level 0 */ /* * Starting block index for each tree level, ordered from leaf level (0) @@ -114,14 +117,18 @@ struct fsverity_signed_digest { extern struct fsverity_hash_alg fsverity_hash_algs[]; -const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, - unsigned int num); -const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, +struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num); +struct ahash_request *fsverity_alloc_hash_request(struct fsverity_hash_alg *alg, + gfp_t gfp_flags); +void fsverity_free_hash_request(struct fsverity_hash_alg *alg, + struct ahash_request *req); +const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg, const u8 *salt, size_t salt_size); int fsverity_hash_page(const struct merkle_tree_params *params, const struct inode *inode, struct ahash_request *req, struct page *page, u8 *out); -int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, +int fsverity_hash_buffer(struct fsverity_hash_alg *alg, const void *data, size_t size, u8 *out); void __init fsverity_check_hash_algs(void); diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c index 31e6d7d2389ab00a1a7d8a4b6bc4c3af03e21a85..6682e4e6b601fdf84002bee63a119d5b67b42ab7 100644 --- a/fs/verity/hash_algs.c +++ b/fs/verity/hash_algs.c @@ -24,6 +24,8 @@ struct fsverity_hash_alg fsverity_hash_algs[] = { }, }; +static DEFINE_MUTEX(fsverity_hash_alg_init_mutex); + /** * fsverity_get_hash_alg() - validate and prepare a hash algorithm * @inode: optional inode for logging purposes @@ -36,8 +38,8 @@ struct fsverity_hash_alg fsverity_hash_algs[] = { * * Return: pointer to the hash alg on success, else an ERR_PTR() */ -const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, - unsigned int num) +struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num) { struct fsverity_hash_alg *alg; struct crypto_ahash *tfm; @@ -50,10 +52,15 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, } alg = &fsverity_hash_algs[num]; - /* pairs with cmpxchg() below */ - tfm = READ_ONCE(alg->tfm); - if (likely(tfm != NULL)) + /* pairs with smp_store_release() below */ + if (likely(smp_load_acquire(&alg->tfm) != NULL)) return alg; + + mutex_lock(&fsverity_hash_alg_init_mutex); + + if (alg->tfm != NULL) + goto out_unlock; + /* * Using the shash API would make things a bit simpler, but the ahash * API is preferable as it allows the use of crypto accelerators. @@ -64,12 +71,14 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, fsverity_warn(inode, "Missing crypto API support for hash algorithm \"%s\"", alg->name); - return ERR_PTR(-ENOPKG); + alg = ERR_PTR(-ENOPKG); + goto out_unlock; } fsverity_err(inode, "Error allocating hash algorithm \"%s\": %ld", alg->name, PTR_ERR(tfm)); - return ERR_CAST(tfm); + alg = ERR_CAST(tfm); + goto out_unlock; } err = -EINVAL; @@ -78,18 +87,63 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, if (WARN_ON(alg->block_size != crypto_ahash_blocksize(tfm))) goto err_free_tfm; + alg->req_pool = mempool_create_kmalloc_pool(1, + sizeof(struct ahash_request) + + crypto_ahash_reqsize(tfm)); + if (!alg->req_pool) { + err = -ENOMEM; + goto err_free_tfm; + } + pr_info("%s using implementation \"%s\"\n", alg->name, crypto_ahash_driver_name(tfm)); - /* pairs with READ_ONCE() above */ - if (cmpxchg(&alg->tfm, NULL, tfm) != NULL) - crypto_free_ahash(tfm); - - return alg; + /* pairs with smp_load_acquire() above */ + smp_store_release(&alg->tfm, tfm); + goto out_unlock; err_free_tfm: crypto_free_ahash(tfm); - return ERR_PTR(err); + alg = ERR_PTR(err); +out_unlock: + mutex_unlock(&fsverity_hash_alg_init_mutex); + return alg; +} + +/** + * fsverity_alloc_hash_request() - allocate a hash request object + * @alg: the hash algorithm for which to allocate the request + * @gfp_flags: memory allocation flags + * + * This is mempool-backed, so this never fails if __GFP_DIRECT_RECLAIM is set in + * @gfp_flags. However, in that case this might need to wait for all + * previously-allocated requests to be freed. So to avoid deadlocks, callers + * must never need multiple requests at a time to make forward progress. + * + * Return: the request object on success; NULL on failure (but see above) + */ +struct ahash_request *fsverity_alloc_hash_request(struct fsverity_hash_alg *alg, + gfp_t gfp_flags) +{ + struct ahash_request *req = mempool_alloc(alg->req_pool, gfp_flags); + + if (req) + ahash_request_set_tfm(req, alg->tfm); + return req; +} + +/** + * fsverity_free_hash_request() - free a hash request object + * @alg: the hash algorithm + * @req: the hash request object to free + */ +void fsverity_free_hash_request(struct fsverity_hash_alg *alg, + struct ahash_request *req) +{ + if (req) { + ahash_request_zero(req); + mempool_free(req, alg->req_pool); + } } /** @@ -101,7 +155,7 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, * Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed * initial hash state on success or an ERR_PTR() on failure. */ -const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, +const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg, const u8 *salt, size_t salt_size) { u8 *hashstate = NULL; @@ -119,11 +173,8 @@ const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, if (!hashstate) return ERR_PTR(-ENOMEM); - req = ahash_request_alloc(alg->tfm, GFP_KERNEL); - if (!req) { - err = -ENOMEM; - goto err_free; - } + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(alg, GFP_KERNEL); /* * Zero-pad the salt to the next multiple of the input size of the hash @@ -158,7 +209,7 @@ const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, if (err) goto err_free; out: - ahash_request_free(req); + fsverity_free_hash_request(alg, req); kfree(padded_salt); return hashstate; @@ -229,7 +280,7 @@ int fsverity_hash_page(const struct merkle_tree_params *params, * * Return: 0 on success, -errno on failure */ -int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, +int fsverity_hash_buffer(struct fsverity_hash_alg *alg, const void *data, size_t size, u8 *out) { struct ahash_request *req; @@ -237,9 +288,8 @@ int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, DECLARE_CRYPTO_WAIT(wait); int err; - req = ahash_request_alloc(alg->tfm, GFP_KERNEL); - if (!req) - return -ENOMEM; + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(alg, GFP_KERNEL); sg_init_one(&sg, data, size); ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | @@ -249,7 +299,7 @@ int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, err = crypto_wait_req(crypto_ahash_digest(req), &wait); - ahash_request_free(req); + fsverity_free_hash_request(alg, req); return err; } diff --git a/fs/verity/open.c b/fs/verity/open.c index 4cdd75acbc97c9666929c7289fa8e3ff0c76af6e..25b29065d897cb558105ec725211eb46a39a342d 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -31,7 +31,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, unsigned int log_blocksize, const u8 *salt, size_t salt_size) { - const struct fsverity_hash_alg *hash_alg; + struct fsverity_hash_alg *hash_alg; int err; u64 blocks; u64 offset; @@ -102,6 +102,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, /* temporarily using level_start[] to store blocks in level */ params->level_start[params->num_levels++] = blocks; } + params->level0_blocks = params->level_start[0]; /* Compute the starting block of each level */ offset = 0; @@ -126,7 +127,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, * Compute the file measurement by hashing the fsverity_descriptor excluding the * signature and with the sig_size field set to 0. */ -static int compute_file_measurement(const struct fsverity_hash_alg *hash_alg, +static int compute_file_measurement(struct fsverity_hash_alg *hash_alg, struct fsverity_descriptor *desc, u8 *measurement) { diff --git a/fs/verity/verify.c b/fs/verity/verify.c index cf09852e5227f2205d966cd92d4b2c1bab82020e..5324270cd7d474f1506c481a9b613568e722717b 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -84,7 +84,8 @@ static inline int cmp_hashes(const struct fsverity_info *vi, * Return: true if the page is valid, else false. */ static bool verify_page(struct inode *inode, const struct fsverity_info *vi, - struct ahash_request *req, struct page *data_page) + struct ahash_request *req, struct page *data_page, + unsigned long level0_ra_pages) { const struct merkle_tree_params *params = &vi->tree_params; const unsigned int hsize = params->digest_size; @@ -117,8 +118,8 @@ static bool verify_page(struct inode *inode, const struct fsverity_info *vi, pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n", level, hindex, hoffset); - hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, - hindex); + hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, hindex, + level == 0 ? level0_ra_pages : 0); if (IS_ERR(hpage)) { err = PTR_ERR(hpage); fsverity_err(inode, @@ -191,13 +192,12 @@ bool fsverity_verify_page(struct page *page) struct ahash_request *req; bool valid; - req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); - if (unlikely(!req)) - return false; + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS); - valid = verify_page(inode, vi, req, page); + valid = verify_page(inode, vi, req, page, 0); - ahash_request_free(req); + fsverity_free_hash_request(vi->tree_params.hash_alg, req); return valid; } @@ -222,25 +222,42 @@ void fsverity_verify_bio(struct bio *bio) { struct inode *inode = bio->bi_io_vec->bv_page->mapping->host; const struct fsverity_info *vi = inode->i_verity_info; + const struct merkle_tree_params *params = &vi->tree_params; struct ahash_request *req; struct bio_vec *bv; int i; - - req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); - if (unlikely(!req)) { + unsigned long max_ra_pages = 0; + + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(params->hash_alg, GFP_NOFS); + + if (bio->bi_opf & REQ_RAHEAD) { + /* + * If this bio is for data readahead, then we also do readahead + * of the first (largest) level of the Merkle tree. Namely, + * when a Merkle tree page is read, we also try to piggy-back on + * some additional pages -- up to 1/4 the number of data pages. + * + * This improves sequential read performance, as it greatly + * reduces the number of I/O requests made to the Merkle tree. + */ bio_for_each_segment_all(bv, bio, i) - SetPageError(bv->bv_page); - return; + max_ra_pages++; + max_ra_pages /= 4; } bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; + unsigned long level0_index = page->index >> params->log_arity; + unsigned long level0_ra_pages = + min(max_ra_pages, params->level0_blocks - level0_index); - if (!PageError(page) && !verify_page(inode, vi, req, page)) + if (!PageError(page) && + !verify_page(inode, vi, req, page, level0_ra_pages)) SetPageError(page); } - ahash_request_free(req); + fsverity_free_hash_request(params->hash_alg, req); } EXPORT_SYMBOL_GPL(fsverity_verify_bio); #endif /* CONFIG_BLOCK */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index cb4833d0646787d03e746d8bd7ec5ea3dbc814af..7cfbe2b0f8867ba060ee9c1f5dd2eb90f6f73d8b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3035,7 +3035,8 @@ xfs_rename( &dfops, &first_block, spaceres); /* - * Set up the target. + * Check for expected errors before we dirty the transaction + * so we can return an error without a transaction abort. */ if (target_ip == NULL) { /* @@ -3047,6 +3048,46 @@ xfs_rename( if (error) goto out_trans_cancel; } + } else { + /* + * If target exists and it's a directory, check that whether + * it can be destroyed. + */ + if (S_ISDIR(VFS_I(target_ip)->i_mode) && + (!xfs_dir_isempty(target_ip) || + (VFS_I(target_ip)->i_nlink > 2))) { + error = -EEXIST; + goto out_trans_cancel; + } + } + + /* + * Directory entry creation below may acquire the AGF. Remove + * the whiteout from the unlinked list first to preserve correct + * AGI/AGF locking order. This dirties the transaction so failures + * after this point will abort and log recovery will clean up the + * mess. + * + * For whiteouts, we need to bump the link count on the whiteout + * inode. After this point, we have a real link, clear the tmpfile + * state flag from the inode so it doesn't accidentally get misused + * in future. + */ + if (wip) { + ASSERT(VFS_I(wip)->i_nlink == 0); + error = xfs_iunlink_remove(tp, wip); + if (error) + goto out_trans_cancel; + + xfs_bumplink(tp, wip); + xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); + VFS_I(wip)->i_state &= ~I_LINKABLE; + } + + /* + * Set up the target. + */ + if (target_ip == NULL) { /* * If target does not exist and the rename crosses * directories, adjust the target directory link count @@ -3067,22 +3108,6 @@ xfs_rename( goto out_bmap_cancel; } } else { /* target_ip != NULL */ - /* - * If target exists and it's a directory, check that both - * target and source are directories and that target can be - * destroyed, or that neither is a directory. - */ - if (S_ISDIR(VFS_I(target_ip)->i_mode)) { - /* - * Make sure target dir is empty. - */ - if (!(xfs_dir_isempty(target_ip)) || - (VFS_I(target_ip)->i_nlink > 2)) { - error = -EEXIST; - goto out_trans_cancel; - } - } - /* * Link the source inode under the target name. * If the source inode is a directory and we are moving @@ -3175,32 +3200,6 @@ xfs_rename( if (error) goto out_bmap_cancel; - /* - * For whiteouts, we need to bump the link count on the whiteout inode. - * This means that failures all the way up to this point leave the inode - * on the unlinked list and so cleanup is a simple matter of dropping - * the remaining reference to it. If we fail here after bumping the link - * count, we're shutting down the filesystem so we'll never see the - * intermediate state on disk. - */ - if (wip) { - ASSERT(VFS_I(wip)->i_nlink == 0); - error = xfs_bumplink(tp, wip); - if (error) - goto out_bmap_cancel; - error = xfs_iunlink_remove(tp, wip); - if (error) - goto out_bmap_cancel; - xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); - - /* - * Now we have a real link, clear the "I'm a tmpfile" state - * flag from the inode so it doesn't accidentally get misused in - * future. - */ - VFS_I(wip)->i_state &= ~I_LINKABLE; - } - xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4e768e606998abe5e4f2587cdf5e834930c280b2..360e32220f93d631e6a317c329d1362f0307e1a4 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -608,6 +608,7 @@ xfs_log_mount( xfs_daddr_t blk_offset, int num_bblks) { + bool fatal = xfs_sb_version_hascrc(&mp->m_sb); int error = 0; int min_logfsbs; @@ -659,9 +660,20 @@ xfs_log_mount( XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks), XFS_MAX_LOG_BYTES); error = -EINVAL; + } else if (mp->m_sb.sb_logsunit > 1 && + mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) { + xfs_warn(mp, + "log stripe unit %u bytes must be a multiple of block size", + mp->m_sb.sb_logsunit); + error = -EINVAL; + fatal = true; } if (error) { - if (xfs_sb_version_hascrc(&mp->m_sb)) { + /* + * Log check errors are always fatal on v5; or whenever bad + * metadata leads to a crash. + */ + if (fatal) { xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!"); ASSERT(0); goto out_free_log; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 37e603bf159137dfd38a1210b5d0bb31e5f4829f..db7f9fdd20a30d3761a316aac0496b8377e7d049 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1125,6 +1125,7 @@ xfs_reflink_remap_extent( uirec.br_startblock = irec->br_startblock + rlen; uirec.br_startoff = irec->br_startoff + rlen; uirec.br_blockcount = unmap_len - rlen; + uirec.br_state = irec->br_state; unmap_len = rlen; /* If this isn't a real mapping, we're done. */ diff --git a/gen_headers_arm.bp b/gen_headers_arm.bp index d56ace0fe65abce18a735bf0ce8200d9b316ac41..e7035cc44ea71465f57829d6b7cac31d00bb805b 100644 --- a/gen_headers_arm.bp +++ b/gen_headers_arm.bp @@ -927,6 +927,7 @@ gen_headers_out_arm = [ "linux/usb/g_printer.h", "linux/usb/gadgetfs.h", "linux/usb/midi.h", + "linux/usb/raw_gadget.h", "linux/usb/tmc.h", "linux/usb/usb_ctrl_qti.h", "linux/usb/video.h", diff --git a/gen_headers_arm64.bp b/gen_headers_arm64.bp index c2beed537f2fd535c2d0bac3c7b88b473d6b10c3..f40413af7833a69b6ba35b902929d855cd2e9424 100644 --- a/gen_headers_arm64.bp +++ b/gen_headers_arm64.bp @@ -922,6 +922,7 @@ gen_headers_out_arm64 = [ "linux/usb/g_printer.h", "linux/usb/gadgetfs.h", "linux/usb/midi.h", + "linux/usb/raw_gadget.h", "linux/usb/tmc.h", "linux/usb/usb_ctrl_qti.h", "linux/usb/video.h", diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 4f077edb9b81b8086153609d6b4b22b4a22b96e0..71fadbe77e211bee751d292eb9a8e5e4862a3b20 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -556,11 +556,12 @@ typedef u64 acpi_integer; #define ACPI_MAKE_RSDP_SIG(dest) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) /* - * Algorithm to obtain access bit width. + * Algorithm to obtain access bit or byte width. * Can be used with access_width of struct acpi_generic_address and access_size of * struct acpi_resource_generic_register. */ #define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) +#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) /******************************************************************************* * diff --git a/include/acpi/processor.h b/include/acpi/processor.h index d591bb77f592b21a56a0496d223ac0455b31b359..f4bff23135479fba812e9b9c8c821e7cacefbebe 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -291,6 +291,14 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx } #endif +static inline int call_on_cpu(int cpu, long (*fn)(void *), void *arg, + bool direct) +{ + if (direct || (is_percpu_thread() && cpu == smp_processor_id())) + return fn(arg); + return work_on_cpu(cpu, fn, arg); +} + /* in processor_perflib.c */ #ifdef CONFIG_CPU_FREQ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e60ea67cffa4a53900a36e9ae85f760e33f6fa8d..3972a2a9026817a65b2d549ae0982fe2e97093f0 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -65,11 +65,11 @@ * .data. We don't want to pull in .data..other sections, which Linux * has defined. Same for text and bss. */ -#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* #define TEXT_CFI_MAIN .text.cfi .text.[0-9a-zA-Z_]*.cfi -#define DATA_MAIN .data .data.[0-9a-zA-Z_]* -#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..compoundliteral* .data..L* +#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral* .bss..L* #else #define TEXT_MAIN .text #define TEXT_CFI_MAIN .text.cfi diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 5f821a9b3a1fa8110186c964a706805714a33966..49fdce1f5941950b8e86cede33894f25f186d7c4 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -343,6 +343,22 @@ struct ttm_bo_driver { uint32_t page_flags, struct page *dummy_read_page); + /** + * ttm_tt_create2 + * + * @bo: pointer to a struct ttm_buffer_object + * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags. + * @dummy_read_page: See struct ttm_bo_device. + * + * Create a struct ttm_tt to back data with system memory pages. + * No pages are actually allocated. + * Returns: + * NULL: Out of memory. + */ + struct ttm_tt *(*ttm_tt_create2)(struct ttm_buffer_object *bo, + uint32_t page_flags, + struct page *dummy_read_page); + /** * ttm_tt_populate * diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h index e0970a578188ccaf563340f115e2aaeadb65267e..a7207a965466322c20ab9dfe9e19698f6072599f 100644 --- a/include/keys/big_key-type.h +++ b/include/keys/big_key-type.h @@ -21,6 +21,6 @@ extern void big_key_free_preparse(struct key_preparsed_payload *prep); extern void big_key_revoke(struct key *key); extern void big_key_destroy(struct key *key); extern void big_key_describe(const struct key *big_key, struct seq_file *m); -extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen); +extern long big_key_read(const struct key *key, char *buffer, size_t buflen); #endif /* _KEYS_BIG_KEY_TYPE_H */ diff --git a/include/keys/user-type.h b/include/keys/user-type.h index 12babe9915944f84a040199451124763d15d4bd0..0d8f3cd3056f01ef74acdf9e63c80d16755607f3 100644 --- a/include/keys/user-type.h +++ b/include/keys/user-type.h @@ -45,8 +45,7 @@ extern int user_update(struct key *key, struct key_preparsed_payload *prep); extern void user_revoke(struct key *key); extern void user_destroy(struct key *key); extern void user_describe(const struct key *user, struct seq_file *m); -extern long user_read(const struct key *key, - char __user *buffer, size_t buflen); +extern long user_read(const struct key *key, char *buffer, size_t buflen); static inline const struct user_key_payload *user_key_payload_rcu(const struct key *key) { diff --git a/include/linux/bitops.h b/include/linux/bitops.h index b767c7ad65c6e95dd0bbd667b9e4c5524fddc34b..c51574fab0b009e728ec74a1e65aa5218b1bc1c2 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -4,7 +4,8 @@ #include #include -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index afa37f807f12c1065a44ba35ffe6c88423756fd2..2e1077ea77db07cdc0d06eba4de6083b2810444b 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -187,6 +187,8 @@ struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); +void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, + gfp_t gfp); struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); @@ -319,6 +321,12 @@ sb_breadahead(struct super_block *sb, sector_t block) __breadahead(sb->s_bdev, block, sb->s_blocksize); } +static inline void +sb_breadahead_unmovable(struct super_block *sb, sector_t block) +{ + __breadahead_gfp(sb->s_bdev, block, sb->s_blocksize, 0); +} + static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index de0f5fe284906fed5e87b4505ed57d6d5208ac15..a22949de5b4047a06b41c0459a982a61557a966a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -61,6 +61,7 @@ struct css_task_iter { struct list_head *mg_tasks_head; struct list_head *dying_tasks_head; + struct list_head *cur_tasks_head; struct css_set *cur_cset; struct css_set *cur_dcset; struct task_struct *cur_task; diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 13377c71584afec8e4f1f15753db894729204bf6..b1ddba09711d07fd805f5ef5f32630cef5414ceb 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -331,7 +331,7 @@ unsigned long read_word_at_a_time(const void *addr) * compiler has support to do so. */ #define compiletime_assert(condition, msg) \ - _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) + _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) #define compiletime_assert_atomic_type(t) \ compiletime_assert(__native_word(t), \ diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index 4635f95000a4c50bff074ac5bb2f174985f7477a..79a6e37a1d6f641bcd56e15af8d2b009cb321f88 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -75,7 +75,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *dfc); #else /* !CONFIG_DEVFREQ_THERMAL */ -struct thermal_cooling_device * +static inline struct thermal_cooling_device * of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, struct devfreq_cooling_power *dfc_power) { diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 087cbe77686802259b39a36edddfa22ef716cdaa..8089e28539f16e00aaaddbfe62c171f0bda0ae6a 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -677,6 +677,7 @@ struct dma_filter { * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api + * @owner: owner module (automatically set based on the provided dev) * @src_addr_widths: bit mask of src addr widths the device supports * @dst_addr_widths: bit mask of dst addr widths the device supports * @directions: bit mask of slave direction the device supports since @@ -738,6 +739,7 @@ struct dma_device { int dev_id; struct device *dev; + struct module *owner; u32 src_addr_widths; u32 dst_addr_widths; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ac3f4888b3dfac7999de92bef9e2e73fd77d3b8f..a5dbb57a687fbaa1be44b06db341016e138a4f65 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -38,9 +38,6 @@ #define F2FS_MAX_QUOTAS 3 #define F2FS_ENC_UTF8_12_1 1 -#define F2FS_ENC_STRICT_MODE_FL (1 << 0) -#define f2fs_has_strict_mode(sbi) \ - (sbi->s_encoding_flags & F2FS_ENC_STRICT_MODE_FL) #define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ #define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ @@ -125,6 +122,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_RESIZEFS_FLAG 0x00004000 #define CP_DISABLED_QUICK_FLAG 0x00002000 #define CP_DISABLED_FLAG 0x00001000 #define CP_QUOTA_NEED_FSCK_FLAG 0x00000800 diff --git a/include/linux/fs.h b/include/linux/fs.h index 9948a8fdde3f3d56486171a90b35467e5bd2c707..236c4d59b9ae007a2c8f57878367198ee68d6bb2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -647,6 +647,7 @@ struct inode { struct rcu_head i_rcu; }; u64 i_version; + atomic64_t i_sequence; /* see futex */ atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; @@ -1312,6 +1313,12 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_ACTIVE (1<<30) #define SB_NOUSER (1<<31) +/* These flags relate to encoding and casefolding */ +#define SB_ENC_STRICT_MODE_FL (1 << 0) + +#define sb_has_enc_strict_mode(sb) \ + (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) + /* * Umount options */ @@ -1380,6 +1387,10 @@ struct super_block { #endif struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ +#ifdef CONFIG_UNICODE + struct unicode_map *s_encoding; + __u16 s_encoding_flags; +#endif struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; struct backing_dev_info *s_bdi; @@ -3197,6 +3208,18 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); +#ifdef CONFIG_UNICODE +extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); +extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name); +extern bool needs_casefold(const struct inode *dir); +#else +static inline bool needs_casefold(const struct inode *dir) +{ + return 0; +} +#endif + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *, diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 3b6b8ccebe7d2e33997338ea382f601946a26ab9..ecc604e61d61b9ffcc96946904f5608e837168b5 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -77,6 +77,10 @@ struct fsverity_operations { * * @inode: the inode * @index: 0-based index of the page within the Merkle tree + * @num_ra_pages: The number of Merkle tree pages that should be + * prefetched starting at @index if the page at @index + * isn't already cached. Implementations may ignore this + * argument; it's only a performance optimization. * * This can be called at any time on an open verity file, as well as * between ->begin_enable_verity() and ->end_enable_verity(). It may be @@ -87,7 +91,8 @@ struct fsverity_operations { * Return: the page on success, ERR_PTR() on failure */ struct page *(*read_merkle_tree_page)(struct inode *inode, - pgoff_t index); + pgoff_t index, + unsigned long num_ra_pages); /** * Write a Merkle tree block to the given inode. diff --git a/include/linux/futex.h b/include/linux/futex.h index a4b6cba699bfbd1dd9686741ff116d3c8702a1e7..6adb1ccac603854af0bbb6626e3f240244d23f38 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -34,23 +34,26 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, union futex_key { struct { + u64 i_seq; unsigned long pgoff; - struct inode *inode; - int offset; + unsigned int offset; } shared; struct { + union { + struct mm_struct *mm; + u64 __tmp; + }; unsigned long address; - struct mm_struct *mm; - int offset; + unsigned int offset; } private; struct { + u64 ptr; unsigned long word; - void *ptr; - int offset; + unsigned int offset; } both; }; -#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } +#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } #ifdef CONFIG_FUTEX enum { diff --git a/include/linux/hid.h b/include/linux/hid.h index 462005543529862d2e29c8af5af08d491485bff3..99c327842f12042adefd932045942f4ce0b72f74 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -478,7 +478,7 @@ struct hid_report_enum { }; #define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */ -#define HID_MAX_BUFFER_SIZE 4096 /* 4kb */ +#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */ #define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */ #define HID_OUTPUT_FIFO_SIZE 64 diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 603e41ac5ed0abf747c72f33875548fce3c04897..05bb8aad8c51e2cbf08da1244914ccbac39e9193 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -621,6 +621,15 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); } +/** + * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee80211_is_any_nullfunc(__le16 fc) +{ + return (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)); +} + /** * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU * @fc: frame control field in little-endian byteorder diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 39faaaf843e1efb864ad148c710ed9cec7a82381..c91cf2dee12abd9fe4225c7000dcc2c25fbef0ed 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -2,15 +2,10 @@ #ifndef _INET_DIAG_H_ #define _INET_DIAG_H_ 1 +#include #include -struct net; -struct sock; struct inet_hashinfo; -struct nlattr; -struct nlmsghdr; -struct sk_buff; -struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, @@ -62,6 +57,17 @@ int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); +static inline size_t inet_diag_msg_attrs_size(void) +{ + return nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ +#if IS_ENABLED(CONFIG_IPV6) + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(1) /* INET_DIAG_SKV6ONLY */ +#endif + + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4); /* INET_DIAG_CLASS_ID */ +} int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, struct user_namespace *user_ns, bool net_admin); diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 99bc5b3ae26e1622f07558c52510c5e351e3cad6..733eaf95e207cd7c91223b01aaf9652b8cac0422 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -130,7 +130,7 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid) BUG(); } -static int intel_svm_is_pasid_valid(struct device *dev, int pasid) +static inline int intel_svm_is_pasid_valid(struct device *dev, int pasid) { return -EINVAL; } diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index dba15ca8e60bc8b0f0d0faf3ca13c2afcd87c231..1dcd9198beb7faf30a820f65e811ba3eb6d0bea0 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -8,6 +8,7 @@ enum { ICQ_EXITED = 1 << 2, + ICQ_DESTROYED = 1 << 3, }; /* diff --git a/include/linux/key-type.h b/include/linux/key-type.h index dfb3ba782d2c7d68cf24b26f4cdb5b4db46d8b2e..535b310a4c3b18759e74d322d6f3757cab99e1d1 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -125,7 +125,7 @@ struct key_type { * much is copied into the buffer * - shouldn't do the copy if the buffer is NULL */ - long (*read)(const struct key *key, char __user *buffer, size_t buflen); + long (*read)(const struct key *key, char *buffer, size_t buflen); /* handle request_key() for this type instead of invoking * /sbin/request-key (optional) diff --git a/include/linux/key.h b/include/linux/key.h index 3571b7a980cc7a071c5d1dbb3e420e573aa90da3..afe4d6b90cad5f7cbb996bcecdd6b043a7cc3ead 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -346,7 +346,7 @@ static inline key_serial_t key_serial(const struct key *key) extern void key_set_timeout(struct key *, unsigned); extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, - key_perm_t perm); + key_perm_t perm); /* * The permissions required on a key that we're looking up. diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 30376715a607056e979f0d958dc4536d8ec1345b..2e06ca46f07c6495f87d3884b517778521b0da19 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -945,7 +945,7 @@ search_memslots(struct kvm_memslots *slots, gfn_t gfn) start = slot + 1; } - if (gfn >= memslots[start].base_gfn && + if (start < slots->used_slots && gfn >= memslots[start].base_gfn && gfn < memslots[start].base_gfn + memslots[start].npages) { atomic_set(&slots->lru_slot, start); return &memslots[start]; diff --git a/include/linux/libata.h b/include/linux/libata.h index c5188dc389c843863cfd0f603a89b232fa548621..93838d98e3f38cd6a675e38eb4259a251664a4da 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1229,6 +1229,7 @@ struct pci_bits { }; extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits); +extern void ata_pci_shutdown_one(struct pci_dev *pdev); extern void ata_pci_remove_one(struct pci_dev *pdev); #ifdef CONFIG_PM diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 3ef96743db8da3868744efc92d812c9b746bf9c7..1ecd35664e0d31e716e79a676ea856698dad6380 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -72,10 +72,10 @@ static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); h->first = n; if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } static inline void __hlist_nulls_del(struct hlist_nulls_node *n) @@ -85,13 +85,13 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n) WRITE_ONCE(*pprev, next); if (!is_a_nulls(next)) - next->pprev = pprev; + WRITE_ONCE(next->pprev, pprev); } static inline void hlist_nulls_del(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index e9c3b98df3e256268d48c6d829fd1d8bd5346dc1..a726f96010d59d9d99eda4e6d5d5ea9776ebdd18 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -279,8 +279,6 @@ struct nvme_fc_remote_port { * * Host/Initiator Transport Entrypoints/Parameters: * - * @module: The LLDD module using the interface - * * @localport_delete: The LLDD initiates deletion of a localport via * nvme_fc_deregister_localport(). However, the teardown is * asynchronous. This routine is called upon the completion of the @@ -394,8 +392,6 @@ struct nvme_fc_remote_port { * Value is Mandatory. Allowed to be zero. */ struct nvme_fc_port_template { - struct module *module; - /* initiator-based functions */ void (*localport_delete)(struct nvme_fc_local_port *); void (*remoteport_delete)(struct nvme_fc_remote_port *); diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 8712ff70995f42e4357eddb8bd9601bc42b246c0..980beb1d891d1ea56c30a61cc0717b0cd3913145 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -275,4 +275,35 @@ static inline __must_check size_t __ab_c_size(size_t n, size_t size, size_t c) sizeof(*(p)->member) + __must_be_array((p)->member),\ sizeof(*(p))) +/** check_shl_overflow() - Calculate a left-shifted value and check overflow + * + * @a: Value to be shifted + * @s: How many bits left to shift + * @d: Pointer to where to store the result + * + * Computes *@d = (@a << @s) + * + * Returns true if '*d' cannot hold the result or when 'a << s' doesn't + * make sense. Example conditions: + * - 'a << s' causes bits to be lost when stored in *d. + * - 's' is garbage (e.g. negative) or so large that the result of + * 'a << s' is guaranteed to be 0. + * - 'a' is negative. + * - 'a << s' sets the sign bit, if any, in '*d'. + * + * '*d' will hold the results of the attempted shift, but is not + * considered "safe for use" if false is returned. + */ +#define check_shl_overflow(a, s, d) ({ \ + typeof(a) _a = a; \ + typeof(s) _s = s; \ + typeof(d) _d = d; \ + u64 _a_full = _a; \ + unsigned int _to_shift = \ + _s >= 0 && _s < 8 * sizeof(*d) ? _s : 0; \ + *_d = (_a_full << _to_shift); \ + (_to_shift != _s || *_d < 0 || _a < 0 || \ + (*_d >> _to_shift) != _a); \ +}) + #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5de46a658548d5fb3352a3a5fa2195229f799fca..a02794afc4dc3691cdbef0fd24461171f6a6ee7c 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -265,7 +265,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } __PAGEFLAG(Locked, locked, PF_NO_TAIL) PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) -PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND) +PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) __SETPAGEFLAG(Referenced, referenced, PF_HEAD) diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index f7a04e1af11212df4b5c265e66788442462b1a27..abbc74621f380c554390466bfbb26874611b5708 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -63,6 +63,7 @@ struct pci_epc_ops { * @bitmap: bitmap to manage the PCI address space * @pages: number of bits representing the address region * @page_size: size of each page + * @lock: mutex to protect bitmap */ struct pci_epc_mem { phys_addr_t phys_base; @@ -70,6 +71,8 @@ struct pci_epc_mem { unsigned long *bitmap; size_t page_size; int pages; + /* mutex to protect against concurrent access for memory allocation*/ + struct mutex lock; }; /** diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 73a7bf30fe9a3fc299812cfbba6ea33a0fde39c5..3f3cece31148db1f2b6f1e52867d2bb51671e913 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -78,9 +78,9 @@ static inline s64 percpu_counter_read(struct percpu_counter *fbc) */ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) { - s64 ret = fbc->count; + /* Prevent reloads of fbc->count */ + s64 ret = READ_ONCE(fbc->count); - barrier(); /* Prevent reloads of fbc->count */ if (ret >= 0) return ret; return 0; diff --git a/include/linux/phy.h b/include/linux/phy.h index f6ec621991735a709af140cc901f25b6f361413c..f7c8e3476fa20d753d0de957cd5d960a5810ab97 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -376,6 +376,7 @@ struct phy_c45_device_ids { * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc. * has_fixups: Set to true if this phy has fixups/quirks. * suspended: Set to true if this phy has been suspended successfully. + * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. * loopback_enabled: Set true if this phy has been loopbacked successfully. * state: state of the PHY for management purposes @@ -415,6 +416,7 @@ struct phy_device { bool is_pseudo_fixed_link; bool has_fixups; bool suspended; + bool suspended_by_mdio_bus; bool sysfs_links; bool loopback_enabled; bool skip_sw_reset; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 891ef068d66f02bb256f075a43a24b7823a982c0..b8c434c34c8c7c97de9c4f2132236dca7f0db8e1 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -59,6 +59,7 @@ enum { POWER_SUPPLY_HEALTH_COLD, POWER_SUPPLY_HEALTH_WATCHDOG_TIMER_EXPIRE, POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE, + POWER_SUPPLY_HEALTH_OVERCURRENT, POWER_SUPPLY_HEALTH_WARM, POWER_SUPPLY_HEALTH_COOL, POWER_SUPPLY_HEALTH_HOT, diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 2dd0a9ed5b361472fbb1794b3b77f96fc69780ce..733fad7dfbed96c0c55f3d6b2f44ca5cede61a86 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -97,6 +97,11 @@ struct qed_chain_u32 { u32 cons_idx; }; +struct addr_tbl_entry { + void *virt_addr; + dma_addr_t dma_map; +}; + struct qed_chain { /* fastpath portion of the chain - required for commands such * as produce / consume. @@ -107,10 +112,11 @@ struct qed_chain { /* Fastpath portions of the PBL [if exists] */ struct { - /* Table for keeping the virtual addresses of the chain pages, - * respectively to the physical addresses in the pbl table. + /* Table for keeping the virtual and physical addresses of the + * chain pages, respectively to the physical addresses + * in the pbl table. */ - void **pp_virt_addr_tbl; + struct addr_tbl_entry *pp_addr_tbl; union { struct qed_chain_pbl_u16 u16; @@ -287,7 +293,7 @@ qed_chain_advance_page(struct qed_chain *p_chain, *(u32 *)page_to_inc = 0; page_index = *(u32 *)page_to_inc; } - *p_next_elem = p_chain->pbl.pp_virt_addr_tbl[page_index]; + *p_next_elem = p_chain->pbl.pp_addr_tbl[page_index].virt_addr; } } @@ -537,7 +543,7 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain, p_chain->pbl_sp.p_phys_table = 0; p_chain->pbl_sp.p_virt_table = NULL; - p_chain->pbl.pp_virt_addr_tbl = NULL; + p_chain->pbl.pp_addr_tbl = NULL; } /** @@ -575,11 +581,11 @@ static inline void qed_chain_init_mem(struct qed_chain *p_chain, static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain, void *p_virt_pbl, dma_addr_t p_phys_pbl, - void **pp_virt_addr_tbl) + struct addr_tbl_entry *pp_addr_tbl) { p_chain->pbl_sp.p_phys_table = p_phys_pbl; p_chain->pbl_sp.p_virt_table = p_virt_pbl; - p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl; + p_chain->pbl.pp_addr_tbl = pp_addr_tbl; } /** @@ -644,7 +650,7 @@ static inline void *qed_chain_get_last_elem(struct qed_chain *p_chain) break; case QED_CHAIN_MODE_PBL: last_page_idx = p_chain->page_cnt - 1; - p_virt_addr = p_chain->pbl.pp_virt_addr_tbl[last_page_idx]; + p_virt_addr = p_chain->pbl.pp_addr_tbl[last_page_idx].virt_addr; break; } /* p_virt_addr points at this stage to the last page of the chain */ @@ -716,7 +722,7 @@ static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain) page_cnt = qed_chain_get_page_cnt(p_chain); for (i = 0; i < page_cnt; i++) - memset(p_chain->pbl.pp_virt_addr_tbl[i], 0, + memset(p_chain->pbl.pp_addr_tbl[i].virt_addr, 0, QED_CHAIN_PAGE_SIZE); } diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index a10da545b3f670c91caced56b85200f788403dd5..cf64a9492256658b29f524c6f90015c652c4f6f6 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -34,7 +34,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) { if (!hlist_nulls_unhashed(n)) { __hlist_nulls_del(n); - n->pprev = NULL; + WRITE_ONCE(n->pprev, NULL); } } @@ -66,7 +66,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** @@ -94,10 +94,10 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } /** diff --git a/include/linux/sched.h b/include/linux/sched.h index 0a6475eaa1c831a6bcc7e534bd8aeb133b485f71..0e460d26a8cd25f89e3b3d0b71a7358055ebfb43 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1046,8 +1046,8 @@ struct task_struct { struct seccomp seccomp; /* Thread group tracking: */ - u32 parent_exec_id; - u32 self_exec_id; + u64 parent_exec_id; + u64 self_exec_id; /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ spinlock_t alloc_lock; diff --git a/include/linux/selection.h b/include/linux/selection.h index 5b278ce99d8d0666ac77bc913133fd3933c1546b..35937a61da0610fd545910497e0b838cdc592445 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -13,8 +13,8 @@ struct tty_struct; -extern struct vc_data *sel_cons; struct tty_struct; +struct vc_data; extern void clear_selection(void); extern int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty); @@ -23,6 +23,8 @@ extern int sel_loadlut(char __user *p); extern int mouse_reporting(void); extern void mouse_report(struct tty_struct * tty, int butt, int mrx, int mry); +bool vc_is_sel(struct vc_data *vc); + extern int console_blanked; extern const unsigned char color_table[]; diff --git a/include/linux/serdev.h b/include/linux/serdev.h index d609e6dc5bad00bb54f9258c077dc3220cbcb938..49f6e382c94e121f050a335f1c2abf26853419e0 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -164,9 +164,21 @@ int serdev_device_add(struct serdev_device *); void serdev_device_remove(struct serdev_device *); struct serdev_controller *serdev_controller_alloc(struct device *, size_t); -int serdev_controller_add(struct serdev_controller *); +int serdev_controller_add_platform(struct serdev_controller *, bool); void serdev_controller_remove(struct serdev_controller *); +/** + * serdev_controller_add() - Add an serdev controller + * @ctrl: controller to be registered. + * + * Register a controller previously allocated via serdev_controller_alloc() with + * the serdev core. + */ +static inline int serdev_controller_add(struct serdev_controller *ctrl) +{ + return serdev_controller_add_platform(ctrl, false); +} + static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl) { struct serdev_device *serdev = ctrl->serdev; diff --git a/include/linux/swab.h b/include/linux/swab.h index e466fd159c857d6d7acec6da574c8647067e063a..bcff5149861a9eb0dc2fa3b8da4bf8a80e5dc343 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -7,6 +7,7 @@ # define swab16 __swab16 # define swab32 __swab32 # define swab64 __swab64 +# define swab __swab # define swahw32 __swahw32 # define swahb32 __swahb32 # define swab16p __swab16p diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 1d3877c39a000a6e5a4fb7647a38de51c5c47c50..0b8c86096752778374006e9a403685911919ec65 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -377,7 +377,8 @@ static inline void num_poisoned_pages_inc(void) } #endif -#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || \ + defined(CONFIG_DEVICE_PRIVATE) static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; diff --git a/include/linux/tty.h b/include/linux/tty.h index 24c7182f115b19e9f5548a0f90d1cd603c4c9c54..3e9c74197a8ffbea1ca39e70ff980420484d208e 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -224,6 +224,8 @@ struct tty_port_client_operations { void (*write_wakeup)(struct tty_port *port); }; +extern const struct tty_port_client_operations tty_port_default_client_ops; + struct tty_port { struct tty_bufhead buf; /* Locked internally */ struct tty_struct *tty; /* Back pointer */ diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 990aa97d804962e65ed00e354ba6dd88e14f2bcd..74484d44c7554bdfea1f86a5cebe89809d1bd8e1 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str, int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str); + struct unicode_map *utf8_load(const char *version); void utf8_unload(struct unicode_map *um); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 3eee06cb4157d53915b50238cb3c82597609b717..b58cee96cd43126eebaf8167e890e8f9210738ca 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -106,12 +106,13 @@ extern void vunmap(const void *addr); extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, void *kaddr, - unsigned long size); + unsigned long pgoff, unsigned long size); extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); -void vmalloc_sync_all(void); - +void vmalloc_sync_mappings(void); +void vmalloc_sync_unmappings(void); + /* * Lowlevel-APIs (not for driver use!) */ diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index 3fd07912909cddc74a30a5d3983d906563017e66..a3de234d3350540e9b46128b1e1452753d99451b 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -142,7 +142,7 @@ static inline bool vt_force_oops_output(struct vc_data *vc) return false; } -extern char vt_dont_switch; +extern bool vt_dont_switch; extern int default_utf8; extern int global_cursor_default; diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h index d84d8c301546bb14724ad858b209cebde035cd8d..54f5caaa5cde52672422c6593027feef2d926a98 100644 --- a/include/linux/wakeup_reason.h +++ b/include/linux/wakeup_reason.h @@ -20,13 +20,18 @@ #define MAX_SUSPEND_ABORT_LEN 256 -void log_wakeup_reason(int irq); -int check_wakeup_reason(int irq); - #ifdef CONFIG_SUSPEND +void log_irq_wakeup_reason(int irq); +void log_threaded_irq_wakeup_reason(int irq, int parent_irq); void log_suspend_abort_reason(const char *fmt, ...); +void log_abnormal_wakeup_reason(const char *fmt, ...); +void clear_wakeup_reasons(void); #else +static inline void log_irq_wakeup_reason(int irq) { } +static inline void log_threaded_irq_wakeup_reason(int irq, int parent_irq) { } static inline void log_suspend_abort_reason(const char *fmt, ...) { } +static inline void log_abnormal_wakeup_reason(const char *fmt, ...) { } +static inline void clear_wakeup_reasons(void) { } #endif #endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h index 8ffa94009d1a956d19894e84eee28ea75f3ae101..76002416cead9be57a9dbe5e0a21a18251e4fe22 100644 --- a/include/media/v4l2-device.h +++ b/include/media/v4l2-device.h @@ -268,7 +268,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ - !(grpid) || __sd->grp_id == (grpid), o, f , \ + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ } while (0) @@ -280,7 +280,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ - !(grpid) || __sd->grp_id == (grpid), o, f , \ + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ }) @@ -294,8 +294,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \ - ##args); \ + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ + f , ##args); \ } while (0) /* @@ -308,8 +308,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \ - ##args); \ + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ + f , ##args); \ }) /* diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 648caf90ec07a3c001415489cb25c8624d729900..b8fd023ba625a1ead99acf06508a7b9a79805fff 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -102,6 +102,7 @@ struct fib_rule_notifier_info { [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ [FRA_FWMARK] = { .type = NLA_U32 }, \ + [FRA_TUN_ID] = { .type = NLA_U64 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 227dc0a841728e3298ca50b2d15b0569817934f3..ddf916e5e57d5936012d519750ce9d1cc8ccca5c 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -5,6 +5,7 @@ #include #include #include +#include #include /** @@ -282,4 +283,12 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec return ((char *)target_container) + flow_dissector->offset[key_id]; } +static inline void +flow_dissector_init_keys(struct flow_dissector_key_control *key_control, + struct flow_dissector_key_basic *key_basic) +{ + memset(key_control, 0, sizeof(*key_control)); + memset(key_basic, 0, sizeof(*key_basic)); +} + #endif diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index bee528135cf1c43e0a290a632d8e0705eaa6afd0..9f7f81117434b9b43ac0d998757a0f2673a703a4 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -207,6 +207,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, return rt->rt6i_flags & RTF_ANYCAST || (rt->rt6i_dst.plen < 127 && + !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } diff --git a/include/net/tcp.h b/include/net/tcp.h index 99be79730770c02ddedcc68bcfcb299ab3670ad0..2392c6544696ca25a420fd2aab9f66bb356b8425 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -55,7 +55,7 @@ extern struct inet_hashinfo tcp_hashinfo; extern struct percpu_counter tcp_orphan_count; void tcp_time_wait(struct sock *sk, int state, int timeo); -#define MAX_TCP_HEADER (128 + MAX_HEADER) +#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 #define TCP_MIN_SND_MSS 48 #define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h index f0a01a54bd153cbca39abc9cb8670c2597cdb231..df156f1d50b2d7e74297e2606ec5e4792f86d24e 100644 --- a/include/scsi/iscsi_proto.h +++ b/include/scsi/iscsi_proto.h @@ -638,7 +638,6 @@ struct iscsi_reject { #define ISCSI_REASON_BOOKMARK_INVALID 9 #define ISCSI_REASON_BOOKMARK_NO_RESOURCES 10 #define ISCSI_REASON_NEGOTIATION_RESET 11 -#define ISCSI_REASON_WAITING_FOR_LOGOUT 12 /* Max. number of Key=Value pairs in a text message */ #define MAX_KEY_VALUE_PAIRS 8192 diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 9955e0b85de5f1bae0ec7b349ac8107cff129d43..f54a533ecc8dd4655c8b9b5498edc8be37d3a154 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -93,9 +93,9 @@ struct snd_rawmidi_substream { struct list_head list; /* list of all substream for given stream */ int stream; /* direction */ int number; /* substream number */ - unsigned int opened: 1, /* open flag */ - append: 1, /* append flag (merge more streams) */ - active_sensing: 1; /* send active sensing when close */ + bool opened; /* open flag */ + bool append; /* append flag (merge more streams) */ + bool active_sensing; /* send active sensing when close */ int use_count; /* use counter (for output) */ size_t bytes; struct snd_rawmidi *rmidi; diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index cf5f3fff1f1a7cb9930753c8abd88347ee66d048..fd7e4d1df9a15cbd136d9603610b16f7fc6f7afc 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -673,7 +673,7 @@ struct iscsi_session { atomic_t session_logout; atomic_t session_reinstatement; atomic_t session_stop_active; - atomic_t sleep_on_sess_wait_comp; + atomic_t session_close; /* connection list */ struct list_head sess_conn_list; struct list_head cr_active_list; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 8b95c16b70454ac875ef5ee3ff8d9df16b78e48b..0978bdae2243b5d0b5f1e66ee89752f8daf866c3 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -40,7 +40,7 @@ enum afs_call_trace { EM(afs_call_trace_free, "FREE ") \ EM(afs_call_trace_put, "PUT ") \ EM(afs_call_trace_wake, "WAKE ") \ - E_(afs_call_trace_work, "WORK ") + E_(afs_call_trace_work, "QUEUE") /* * Export enum symbols via userspace. diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index ed0d788495fcb3858fb584ca5a47bd219f560375..5b95f24125ed5131ff555b832e4bc69a2985243b 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -163,7 +163,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE); #define show_compress_algorithm(type) \ __print_symbolic(type, \ { COMPRESS_LZO, "LZO" }, \ - { COMPRESS_LZ4, "LZ4" }) + { COMPRESS_LZ4, "LZ4" }, \ + { COMPRESS_ZSTD, "ZSTD" }) struct f2fs_sb_info; struct f2fs_io_info; @@ -1821,6 +1822,77 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end, TP_ARGS(inode, cluster_idx, compressed_size, ret) ); +TRACE_EVENT(f2fs_iostat, + + TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat), + + TP_ARGS(sbi, iostat), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long long, app_dio) + __field(unsigned long long, app_bio) + __field(unsigned long long, app_wio) + __field(unsigned long long, app_mio) + __field(unsigned long long, fs_dio) + __field(unsigned long long, fs_nio) + __field(unsigned long long, fs_mio) + __field(unsigned long long, fs_gc_dio) + __field(unsigned long long, fs_gc_nio) + __field(unsigned long long, fs_cp_dio) + __field(unsigned long long, fs_cp_nio) + __field(unsigned long long, fs_cp_mio) + __field(unsigned long long, app_drio) + __field(unsigned long long, app_brio) + __field(unsigned long long, app_rio) + __field(unsigned long long, app_mrio) + __field(unsigned long long, fs_drio) + __field(unsigned long long, fs_nrio) + __field(unsigned long long, fs_mrio) + __field(unsigned long long, fs_discard) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + __entry->app_dio = iostat[APP_DIRECT_IO]; + __entry->app_bio = iostat[APP_BUFFERED_IO]; + __entry->app_wio = iostat[APP_WRITE_IO]; + __entry->app_mio = iostat[APP_MAPPED_IO]; + __entry->fs_dio = iostat[FS_DATA_IO]; + __entry->fs_nio = iostat[FS_NODE_IO]; + __entry->fs_mio = iostat[FS_META_IO]; + __entry->fs_gc_dio = iostat[FS_GC_DATA_IO]; + __entry->fs_gc_nio = iostat[FS_GC_NODE_IO]; + __entry->fs_cp_dio = iostat[FS_CP_DATA_IO]; + __entry->fs_cp_nio = iostat[FS_CP_NODE_IO]; + __entry->fs_cp_mio = iostat[FS_CP_META_IO]; + __entry->app_drio = iostat[APP_DIRECT_READ_IO]; + __entry->app_brio = iostat[APP_BUFFERED_READ_IO]; + __entry->app_rio = iostat[APP_READ_IO]; + __entry->app_mrio = iostat[APP_MAPPED_READ_IO]; + __entry->fs_drio = iostat[FS_DATA_READ_IO]; + __entry->fs_nrio = iostat[FS_NODE_READ_IO]; + __entry->fs_mrio = iostat[FS_META_READ_IO]; + __entry->fs_discard = iostat[FS_DISCARD]; + ), + + TP_printk("dev = (%d,%d), " + "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu], " + "fs [data=%llu, node=%llu, meta=%llu, discard=%llu], " + "gc [data=%llu, node=%llu], " + "cp [data=%llu, node=%llu, meta=%llu], " + "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " + "fs [data=%llu, node=%llu, meta=%llu]", + show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, + __entry->app_bio, __entry->app_mio, __entry->fs_dio, + __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, + __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, + __entry->fs_cp_nio, __entry->fs_cp_mio, + __entry->app_rio, __entry->app_drio, __entry->app_brio, + __entry->app_mrio, __entry->fs_drio, __entry->fs_nrio, + __entry->fs_mrio) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ diff --git a/include/trace/events/gpu_mem.h b/include/trace/events/gpu_mem.h new file mode 100644 index 0000000000000000000000000000000000000000..1897822a9150659127631d4220702bd639beef5f --- /dev/null +++ b/include/trace/events/gpu_mem.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * GPU memory trace points + * + * Copyright (C) 2020 Google, Inc. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gpu_mem + +#if !defined(_TRACE_GPU_MEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_GPU_MEM_H + +#include + +/* + * The gpu_memory_total event indicates that there's an update to either the + * global or process total gpu memory counters. + * + * This event should be emitted whenever the kernel device driver allocates, + * frees, imports, unimports memory in the GPU addressable space. + * + * @gpu_id: This is the gpu id. + * + * @pid: Put 0 for global total, while positive pid for process total. + * + * @size: Virtual size of the allocation in bytes. + * + */ +TRACE_EVENT(gpu_mem_total, + + TP_PROTO(uint32_t gpu_id, uint32_t pid, uint64_t size), + + TP_ARGS(gpu_id, pid, size), + + TP_STRUCT__entry( + __field(uint32_t, gpu_id) + __field(uint32_t, pid) + __field(uint64_t, size) + ), + + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->pid = pid; + __entry->size = size; + ), + + TP_printk("gpu_id=%u pid=%u size=%llu", + __entry->gpu_id, + __entry->pid, + __entry->size) +); + +#endif /* _TRACE_GPU_MEM_H */ + +/* This part must be outside protection */ +#include diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index f06a789f34cd9993f3952da30723adf995790605..00da2d183dcaeffd173d7074b566f1088a9452c3 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -46,6 +46,7 @@ extern "C" { #define DRM_VIRTGPU_TRANSFER_TO_HOST 0x07 #define DRM_VIRTGPU_WAIT 0x08 #define DRM_VIRTGPU_GET_CAPS 0x09 +#define DRM_VIRTGPU_RESOURCE_CREATE_BLOB 0x0a #define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01 #define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02 @@ -69,8 +70,11 @@ struct drm_virtgpu_execbuffer { __s32 fence_fd; /* in/out fence fd (see VIRTGPU_EXECBUF_FENCE_FD_IN/OUT) */ }; + #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ #define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */ +#define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */ +#define VIRTGPU_PARAM_HOST_VISIBLE 4 struct drm_virtgpu_getparam { __u64 param; @@ -100,7 +104,13 @@ struct drm_virtgpu_resource_info { __u32 bo_handle; __u32 res_handle; __u32 size; - __u32 stride; + union { + __u32 stride; + __u32 strides[4]; /* strides[0] is accessible with stride. */ + }; + __u32 num_planes; + __u32 offsets[4]; + __u64 format_modifier; }; struct drm_virtgpu_3d_box { @@ -140,6 +150,31 @@ struct drm_virtgpu_get_caps { __u32 pad; }; +struct drm_virtgpu_resource_create_blob { +#define VIRTGPU_BLOB_MEM_GUEST 0x0001 +#define VIRTGPU_BLOB_MEM_HOST 0x0002 +#define VIRTGPU_BLOB_MEM_HOST_GUEST 0x0003 + +#define VIRTGPU_BLOB_FLAG_MAPPABLE 0x0001 +#define VIRTGPU_BLOB_FLAG_SHAREABLE 0x0002 +#define VIRTGPU_BLOB_FLAG_CROSS_DEVICE 0x0004 + /* zero is invalid blob_mem */ + __u32 blob_mem; + __u32 blob_flags; + __u32 bo_handle; + __u32 res_handle; + __u64 size; + + /* + * for 3D contexts with VIRTGPU_BLOB_MEM_HOSTGUEST and + * VIRTGPU_BLOB_MEM_HOST otherwise, must be zero. + */ + __u32 pad; + __u32 cmd_size; + __u64 cmd; + __u64 blob_id; +}; + #define DRM_IOCTL_VIRTGPU_MAP \ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map) @@ -175,6 +210,10 @@ struct drm_virtgpu_get_caps { DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, \ struct drm_virtgpu_get_caps) +#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE_BLOB, \ + struct drm_virtgpu_resource_create_blob) + #if defined(__cplusplus) } #endif diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index 7b8c9e19bad1c2bf72c21dcf6b358559d5e4b4ea..0f3cb13db8e93efe9d6319395f21849e896f7a02 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -65,7 +65,12 @@ /* keyctl structures */ struct keyctl_dh_params { - __s32 private; + union { +#ifndef __cplusplus + __s32 private; +#endif + __s32 priv; + }; __s32 prime; __s32 base; }; diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h index a0cac1d8670d8a89320adba86688aee6ef8a63b7..1937915be4137a3b122461495cd179f0c3105885 100644 --- a/include/uapi/linux/serio.h +++ b/include/uapi/linux/serio.h @@ -9,7 +9,7 @@ #ifndef _UAPI_SERIO_H #define _UAPI_SERIO_H - +#include #include #define SPIOCSTYPE _IOW('q', 0x01, unsigned long) @@ -18,10 +18,10 @@ /* * bit masks for use in "interrupt" flags (3rd argument) */ -#define SERIO_TIMEOUT BIT(0) -#define SERIO_PARITY BIT(1) -#define SERIO_FRAME BIT(2) -#define SERIO_OOB_DATA BIT(3) +#define SERIO_TIMEOUT _BITUL(0) +#define SERIO_PARITY _BITUL(1) +#define SERIO_FRAME _BITUL(2) +#define SERIO_OOB_DATA _BITUL(3) /* * Serio types diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index 23cd84868cc3bd8907994664c82407ca49a1d8d9..7272f85d6d6ab5e24dd806e7f88760eaafcd7326 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -4,6 +4,7 @@ #include #include +#include #include /* @@ -132,6 +133,15 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) __fswab64(x)) #endif +static __always_inline unsigned long __swab(const unsigned long y) +{ +#if __BITS_PER_LONG == 64 + return __swab64(y); +#else /* __BITS_PER_LONG == 32 */ + return __swab32(y); +#endif +} + /** * __swahw32 - return a word-swapped 32-bit value * @x: value to wordswap diff --git a/include/uapi/linux/usb/charger.h b/include/uapi/linux/usb/charger.h index 5f72af35b3ed768fd3bc1e2d8ca3f4f6ecaea47f..ad22079125bff2ba34f03b74e6cfd5e2eb460479 100644 --- a/include/uapi/linux/usb/charger.h +++ b/include/uapi/linux/usb/charger.h @@ -14,18 +14,18 @@ * ACA (Accessory Charger Adapters) */ enum usb_charger_type { - UNKNOWN_TYPE, - SDP_TYPE, - DCP_TYPE, - CDP_TYPE, - ACA_TYPE, + UNKNOWN_TYPE = 0, + SDP_TYPE = 1, + DCP_TYPE = 2, + CDP_TYPE = 3, + ACA_TYPE = 4, }; /* USB charger state */ enum usb_charger_state { - USB_CHARGER_DEFAULT, - USB_CHARGER_PRESENT, - USB_CHARGER_ABSENT, + USB_CHARGER_DEFAULT = 0, + USB_CHARGER_PRESENT = 1, + USB_CHARGER_ABSENT = 2, }; #endif /* _UAPI__LINUX_USB_CHARGER_H */ diff --git a/include/uapi/linux/usb/raw_gadget.h b/include/uapi/linux/usb/raw_gadget.h new file mode 100644 index 0000000000000000000000000000000000000000..ea375082b3ac7ab9fa6ea6919ce35cb0ff9a6588 --- /dev/null +++ b/include/uapi/linux/usb/raw_gadget.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * USB Raw Gadget driver. + * + * See Documentation/usb/raw-gadget.rst for more details. + */ + +#ifndef _UAPI__LINUX_USB_RAW_GADGET_H +#define _UAPI__LINUX_USB_RAW_GADGET_H + +#include +#include +#include + +/* Maximum length of driver_name/device_name in the usb_raw_init struct. */ +#define UDC_NAME_LENGTH_MAX 128 + +/* + * struct usb_raw_init - argument for USB_RAW_IOCTL_INIT ioctl. + * @speed: The speed of the emulated USB device, takes the same values as + * the usb_device_speed enum: USB_SPEED_FULL, USB_SPEED_HIGH, etc. + * @driver_name: The name of the UDC driver. + * @device_name: The name of a UDC instance. + * + * The last two fields identify a UDC the gadget driver should bind to. + * For example, Dummy UDC has "dummy_udc" as its driver_name and "dummy_udc.N" + * as its device_name, where N in the index of the Dummy UDC instance. + * At the same time the dwc2 driver that is used on Raspberry Pi Zero, has + * "20980000.usb" as both driver_name and device_name. + */ +struct usb_raw_init { + __u8 driver_name[UDC_NAME_LENGTH_MAX]; + __u8 device_name[UDC_NAME_LENGTH_MAX]; + __u8 speed; +}; + +/* The type of event fetched with the USB_RAW_IOCTL_EVENT_FETCH ioctl. */ +enum usb_raw_event_type { + USB_RAW_EVENT_INVALID = 0, + + /* This event is queued when the driver has bound to a UDC. */ + USB_RAW_EVENT_CONNECT = 1, + + /* This event is queued when a new control request arrived to ep0. */ + USB_RAW_EVENT_CONTROL = 2, + + /* The list might grow in the future. */ +}; + +/* + * struct usb_raw_event - argument for USB_RAW_IOCTL_EVENT_FETCH ioctl. + * @type: The type of the fetched event. + * @length: Length of the data buffer. Updated by the driver and set to the + * actual length of the fetched event data. + * @data: A buffer to store the fetched event data. + * + * Currently the fetched data buffer is empty for USB_RAW_EVENT_CONNECT, + * and contains struct usb_ctrlrequest for USB_RAW_EVENT_CONTROL. + */ +struct usb_raw_event { + __u32 type; + __u32 length; + __u8 data[0]; +}; + +#define USB_RAW_IO_FLAGS_ZERO 0x0001 +#define USB_RAW_IO_FLAGS_MASK 0x0001 + +static inline int usb_raw_io_flags_valid(__u16 flags) +{ + return (flags & ~USB_RAW_IO_FLAGS_MASK) == 0; +} + +static inline int usb_raw_io_flags_zero(__u16 flags) +{ + return (flags & USB_RAW_IO_FLAGS_ZERO); +} + +/* + * struct usb_raw_ep_io - argument for USB_RAW_IOCTL_EP0/EP_WRITE/READ ioctls. + * @ep: Endpoint handle as returned by USB_RAW_IOCTL_EP_ENABLE for + * USB_RAW_IOCTL_EP_WRITE/READ. Ignored for USB_RAW_IOCTL_EP0_WRITE/READ. + * @flags: When USB_RAW_IO_FLAGS_ZERO is specified, the zero flag is set on + * the submitted USB request, see include/linux/usb/gadget.h for details. + * @length: Length of data. + * @data: Data to send for USB_RAW_IOCTL_EP0/EP_WRITE. Buffer to store received + * data for USB_RAW_IOCTL_EP0/EP_READ. + */ +struct usb_raw_ep_io { + __u16 ep; + __u16 flags; + __u32 length; + __u8 data[0]; +}; + +/* + * Initializes a Raw Gadget instance. + * Accepts a pointer to the usb_raw_init struct as an argument. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_INIT _IOW('U', 0, struct usb_raw_init) + +/* + * Instructs Raw Gadget to bind to a UDC and start emulating a USB device. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_RUN _IO('U', 1) + +/* + * A blocking ioctl that waits for an event and returns fetched event data to + * the user. + * Accepts a pointer to the usb_raw_event struct. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_EVENT_FETCH _IOR('U', 2, struct usb_raw_event) + +/* + * Queues an IN (OUT for READ) urb as a response to the last control request + * received on endpoint 0, provided that was an IN (OUT for READ) request and + * waits until the urb is completed. Copies received data to user for READ. + * Accepts a pointer to the usb_raw_ep_io struct as an argument. + * Returns length of trasferred data on success or negative error code on + * failure. + */ +#define USB_RAW_IOCTL_EP0_WRITE _IOW('U', 3, struct usb_raw_ep_io) +#define USB_RAW_IOCTL_EP0_READ _IOWR('U', 4, struct usb_raw_ep_io) + +/* + * Finds an endpoint that supports the transfer type specified in the + * descriptor and enables it. + * Accepts a pointer to the usb_endpoint_descriptor struct as an argument. + * Returns enabled endpoint handle on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_EP_ENABLE _IOW('U', 5, struct usb_endpoint_descriptor) + +/* Disables specified endpoint. + * Accepts endpoint handle as an argument. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_EP_DISABLE _IOW('U', 6, __u32) + +/* + * Queues an IN (OUT for READ) urb as a response to the last control request + * received on endpoint usb_raw_ep_io.ep, provided that was an IN (OUT for READ) + * request and waits until the urb is completed. Copies received data to user + * for READ. + * Accepts a pointer to the usb_raw_ep_io struct as an argument. + * Returns length of trasferred data on success or negative error code on + * failure. + */ +#define USB_RAW_IOCTL_EP_WRITE _IOW('U', 7, struct usb_raw_ep_io) +#define USB_RAW_IOCTL_EP_READ _IOWR('U', 8, struct usb_raw_ep_io) + +/* + * Switches the gadget into the configured state. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_CONFIGURE _IO('U', 9) + +/* + * Constrains UDC VBUS power usage. + * Accepts current limit in 2 mA units as an argument. + * Returns 0 on success or negative error code on failure. + */ +#define USB_RAW_IOCTL_VBUS_DRAW _IOW('U', 10, __u32) + +#endif /* _UAPI__LINUX_USB_RAW_GADGET_H */ diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 3d6d63f9237c190158f3863bc0e7e831e426ea89..f44af31f7990dd1d45347354f777e47e4ffe6596 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -40,8 +40,33 @@ #include -#define VIRTIO_GPU_F_VIRGL 0 -#define VIRTIO_GPU_F_EDID 1 +/* + * VIRTIO_GPU_CMD_CTX_* + * VIRTIO_GPU_CMD_*_3D + */ +#define VIRTIO_GPU_F_VIRGL 0 + +/* + * VIRTIO_GPU_CMD_GET_EDID + */ +#define VIRTIO_GPU_F_EDID 1 +/* + * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID + */ +#define VIRTIO_GPU_F_RESOURCE_UUID 2 +/* + * VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB + */ +#define VIRTIO_GPU_F_RESOURCE_BLOB 3 +/* + * VIRTIO_GPU_CMD_RESOURCE_MAP + * VIRTIO_GPU_CMD_RESOURCE_UMAP + */ +#define VIRTIO_GPU_F_HOST_VISIBLE 4 +/* + * VIRTIO_GPU_CMD_CTX_CREATE_V2 + */ +#define VIRTIO_GPU_F_VULKAN 5 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -58,6 +83,10 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_GET_CAPSET_INFO, VIRTIO_GPU_CMD_GET_CAPSET, VIRTIO_GPU_CMD_GET_EDID, + VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID, + VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB, + VIRTIO_GPU_CMD_RESOURCE_MAP, + VIRTIO_GPU_CMD_RESOURCE_UNMAP, /* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -79,6 +108,13 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET_INFO, VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, + VIRTIO_GPU_RESP_OK_RESOURCE_UUID, + VIRTIO_GPU_RESP_OK_MAP_INFO, + + /* CHROMIUM: legacy responses */ + VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO_LEGACY = 0x1104, + /* CHROMIUM: success responses */ + VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO = 0x11FF, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, @@ -87,6 +123,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID, VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, + VIRTIO_GPU_RESP_ERR_INVALID_MEMORY_ID, }; #define VIRTIO_GPU_FLAG_FENCE (1 << 0) @@ -138,6 +175,7 @@ struct virtio_gpu_resource_unref { struct virtio_gpu_resource_create_2d { struct virtio_gpu_ctrl_hdr hdr; __le32 resource_id; + /* memory_type is VIRTIO_GPU_MEMORY_TRANSFER */ __le32 format; __le32 width; __le32 height; @@ -179,6 +217,7 @@ struct virtio_gpu_resource_attach_backing { struct virtio_gpu_ctrl_hdr hdr; __le32 resource_id; __le32 nr_entries; + /* struct virtio_gpu_mem_entry entries follow here */ }; /* VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING */ @@ -263,6 +302,7 @@ struct virtio_gpu_cmd_submit { }; #define VIRTIO_GPU_CAPSET_VIRGL 1 +#define VIRTIO_GPU_CAPSET_VIRGL2 2 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ struct virtio_gpu_get_capset_info { @@ -308,6 +348,15 @@ struct virtio_gpu_resp_edid { __u8 edid[1024]; }; +/* VIRTIO_GPU_RESP_OK_RESOURCE_PLANE_INFO */ +struct virtio_gpu_resp_resource_plane_info { + struct virtio_gpu_ctrl_hdr hdr; + __le32 num_planes; + __le64 format_modifier; + __le32 strides[4]; + __le32 offsets[4]; +}; + #define VIRTIO_GPU_EVENT_DISPLAY (1 << 0) struct virtio_gpu_config { @@ -331,4 +380,68 @@ enum virtio_gpu_formats { VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM = 134, }; +/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */ +struct virtio_gpu_resource_assign_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 padding; +}; + +/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */ +struct virtio_gpu_resp_resource_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __u8 uuid[16]; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB */ +struct virtio_gpu_resource_create_blob { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; +#define VIRTIO_GPU_BLOB_MEM_GUEST 0x0001 +#define VIRTIO_GPU_BLOB_MEM_HOST3D 0x0002 +#define VIRTIO_GPU_BLOB_MEM_HOST3D_GUEST 0x0003 +#define VIRTIO_GPU_BLOB_MEM_HOSTSYS 0x0004 +#define VIRTIO_GPU_BLOB_MEM_HOSTSYS_GUEST 0x0005 + +#define VIRTIO_GPU_BLOB_FLAG_USE_MAPPABLE 0x0001 +#define VIRTIO_GPU_BLOB_FLAG_USE_SHAREABLE 0x0002 +#define VIRTIO_GPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004 + /* zero is invalid blob mem */ + __le32 blob_mem; + __le32 blob_flags; + __le64 blob_id; + __le64 size; + __le32 nr_entries; + /* + * sizeof(nr_entries * virtio_gpu_mem_entry) bytes follow + */ +}; + +/* VIRTIO_GPU_CMD_RESOURCE_MAP */ +struct virtio_gpu_resource_map { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 padding; + __le64 offset; +}; + +/* VIRTIO_GPU_RESP_OK_MAP_INFO */ +#define VIRTIO_GPU_MAP_CACHE_MASK 0x0f +#define VIRTIO_GPU_MAP_CACHE_NONE 0x00 +#define VIRTIO_GPU_MAP_CACHE_CACHED 0x01 +#define VIRTIO_GPU_MAP_CACHE_UNCACHED 0x02 +#define VIRTIO_GPU_MAP_CACHE_WC 0x03 +struct virtio_gpu_resp_map_info { + struct virtio_gpu_ctrl_hdr hdr; + __u32 map_flags; + __u32 padding; +}; + +/* VIRTIO_GPU_CMD_RESOURCE_UNMAP */ +struct virtio_gpu_resource_unmap { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 padding; +}; + #endif diff --git a/init/Makefile b/init/Makefile index 0320e1a0705d20778f016cee1c425b5da278868b..e5dd3192ed356ef2e8579c0a4c65035cc6fb90c1 100644 --- a/init/Makefile +++ b/init/Makefile @@ -33,5 +33,6 @@ $(obj)/version.o: include/generated/compile.h silent_chk_compile.h = : include/generated/compile.h: FORCE @$($(quiet)chk_compile.h) - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" \ + "$(CC) $(KBUILD_CFLAGS)" "$(LD)" diff --git a/ipc/sem.c b/ipc/sem.c index d6dd2dc9ddad3c579ddab5d5b8e2730465fc9238..6adc245f3e02cd17fb9c0502e15e0a2a33e2557e 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2248,11 +2248,9 @@ void exit_sem(struct task_struct *tsk) ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); - /* we are the last process using this ulp, acquiring ulp->lock - * isn't required. Besides that, we are also protected against - * IPC_RMID as we hold sma->sem_perm lock now - */ + spin_lock(&ulp->lock); list_del_rcu(&un->list_proc); + spin_unlock(&ulp->lock); /* perform adjustments registered in un */ for (i = 0; i < sma->sem_nsems; i++) { diff --git a/ipc/util.c b/ipc/util.c index 79b30eee32cd857c5dae1a71d1cdf37b34b07d0a..7989f5e532198b19d65d895b5d0326d4bfe0bd36 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -750,13 +750,13 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, total++; } + *new_pos = pos + 1; if (total >= ids->in_use) return NULL; for (; pos < IPCMNI; pos++) { ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { - *new_pos = pos + 1; rcu_read_lock(); ipc_lock_object(ipc); return ipc; diff --git a/kernel/audit.c b/kernel/audit.c index d301276bca584170e2d01de37f5372cbf5f4de38..aa6d5e39526b1840bf71869db6b9bcf5708f0db9 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1067,13 +1067,11 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature audit_log_end(ab); } -static int audit_set_feature(struct sk_buff *skb) +static int audit_set_feature(struct audit_features *uaf) { - struct audit_features *uaf; int i; BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names)); - uaf = nlmsg_data(nlmsg_hdr(skb)); /* if there is ever a version 2 we should handle that here */ @@ -1141,6 +1139,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { u32 seq; void *data; + int data_len; int err; struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; @@ -1154,6 +1153,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) seq = nlh->nlmsg_seq; data = nlmsg_data(nlh); + data_len = nlmsg_len(nlh); switch (msg_type) { case AUDIT_GET: { @@ -1177,7 +1177,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct audit_status s; memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); if (s.mask & AUDIT_STATUS_ENABLED) { err = audit_set_enabled(s.enabled); if (err < 0) @@ -1281,7 +1281,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; break; case AUDIT_SET_FEATURE: - err = audit_set_feature(skb); + if (data_len < sizeof(struct audit_features)) + return -EINVAL; + err = audit_set_feature(data); if (err) return err; break; @@ -1290,9 +1292,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: if (!audit_enabled && msg_type != AUDIT_USER_AVC) return 0; + /* exit early if there isn't at least one character to print */ + if (data_len < 2) + return -EINVAL; err = audit_filter(msg_type, AUDIT_FILTER_USER); if (err == 1) { /* match or error */ + char *str = data; + err = 0; if (msg_type == AUDIT_USER_TTY) { err = tty_audit_push(); @@ -1300,26 +1307,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; } audit_log_common_recv_msg(&ab, msg_type); - if (msg_type != AUDIT_USER_TTY) + if (msg_type != AUDIT_USER_TTY) { + /* ensure NULL termination */ + str[data_len - 1] = '\0'; audit_log_format(ab, " msg='%.*s'", AUDIT_MESSAGE_TEXT_MAX, - (char *)data); - else { - int size; - + str); + } else { audit_log_format(ab, " data="); - size = nlmsg_len(nlh); - if (size > 0 && - ((unsigned char *)data)[size - 1] == '\0') - size--; - audit_log_n_untrustedstring(ab, data, size); + if (data_len > 0 && str[data_len - 1] == '\0') + data_len--; + audit_log_n_untrustedstring(ab, str, data_len); } audit_log_end(ab); } break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: - if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) + if (data_len < sizeof(struct audit_rule_data)) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE); @@ -1327,7 +1332,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_log_end(ab); return -EPERM; } - err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh)); + err = audit_rule_change(msg_type, seq, data, data_len); break; case AUDIT_LIST_RULES: err = audit_list_rules_send(skb, seq); @@ -1341,7 +1346,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_MAKE_EQUIV: { void *bufp = data; u32 sizes[2]; - size_t msglen = nlmsg_len(nlh); + size_t msglen = data_len; char *old, *new; err = -EINVAL; @@ -1417,7 +1422,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); /* check if new data is valid */ if ((s.enabled != 0 && s.enabled != 1) || (s.log_passwd != 0 && s.log_passwd != 1)) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 215c6e1ee026fa2006ceb090b243f393867a2b25..16cf396ea738a2c29769b18997f30fdb0d59a9a9 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -435,6 +435,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, bufp = data->buf; for (i = 0; i < data->field_count; i++) { struct audit_field *f = &entry->rule.fields[i]; + u32 f_val; err = -EINVAL; @@ -443,12 +444,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, goto exit_free; f->type = data->fields[i]; - f->val = data->values[i]; + f_val = data->values[i]; /* Support legacy tests for a valid loginuid */ - if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { + if ((f->type == AUDIT_LOGINUID) && (f_val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; - f->val = 0; + f_val = 0; entry->rule.pflags |= AUDIT_LOGINUID_LEGACY; } @@ -464,7 +465,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SUID: case AUDIT_FSUID: case AUDIT_OBJ_UID: - f->uid = make_kuid(current_user_ns(), f->val); + f->uid = make_kuid(current_user_ns(), f_val); if (!uid_valid(f->uid)) goto exit_free; break; @@ -473,12 +474,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SGID: case AUDIT_FSGID: case AUDIT_OBJ_GID: - f->gid = make_kgid(current_user_ns(), f->val); + f->gid = make_kgid(current_user_ns(), f_val); if (!gid_valid(f->gid)) goto exit_free; break; case AUDIT_SESSIONID: case AUDIT_ARCH: + f->val = f_val; entry->rule.arch_f = f; break; case AUDIT_SUBJ_USER: @@ -491,11 +493,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_OBJ_TYPE: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } + entry->rule.buflen += f_val; + f->lsm_str = str; err = security_audit_rule_init(f->type, f->op, str, (void **)&f->lsm_rule); /* Keep currently invalid fields around in case they @@ -504,68 +508,71 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, pr_warn("audit rule for LSM \'%s\' is invalid\n", str); err = 0; - } - if (err) { - kfree(str); + } else if (err) goto exit_free; - } else - f->lsm_str = str; break; case AUDIT_WATCH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - - err = audit_to_watch(&entry->rule, str, f->val, f->op); + } + err = audit_to_watch(&entry->rule, str, f_val, f->op); if (err) { kfree(str); goto exit_free; } + entry->rule.buflen += f_val; break; case AUDIT_DIR: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } err = audit_make_tree(&entry->rule, str, f->op); kfree(str); if (err) goto exit_free; + entry->rule.buflen += f_val; break; case AUDIT_INODE: + f->val = f_val; err = audit_to_inode(&entry->rule, f); if (err) goto exit_free; break; case AUDIT_FILTERKEY: - if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN) + if (entry->rule.filterkey || f_val > AUDIT_MAX_KEY_LEN) goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; + } + entry->rule.buflen += f_val; entry->rule.filterkey = str; break; case AUDIT_EXE: - if (entry->rule.exe || f->val > PATH_MAX) + if (entry->rule.exe || f_val > PATH_MAX) goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); + str = audit_unpack_string(&bufp, &remain, f_val); if (IS_ERR(str)) { err = PTR_ERR(str); goto exit_free; } - entry->rule.buflen += f->val; - - audit_mark = audit_alloc_mark(&entry->rule, str, f->val); + audit_mark = audit_alloc_mark(&entry->rule, str, f_val); if (IS_ERR(audit_mark)) { kfree(str); err = PTR_ERR(audit_mark); goto exit_free; } + entry->rule.buflen += f_val; entry->rule.exe = audit_mark; break; + default: + f->val = f_val; + break; } } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 89d58554eb9910b1a6c251875efcf8a645ac44ae..d346cc46c9d52ed66ff3ddc62e3aca81f618ce30 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1473,7 +1473,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, union bpf_attr __user *uattr) { struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); - struct bpf_prog_info info = {}; + struct bpf_prog_info info; u32 info_len = attr->info.info_len; char __user *uinsns; u32 ulen; @@ -1484,6 +1484,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, return err; info_len = min_t(u32, sizeof(info), info_len); + memset(&info, 0, sizeof(info)); if (copy_from_user(&info, uinfo, info_len)) return -EFAULT; @@ -1529,7 +1530,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, union bpf_attr __user *uattr) { struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); - struct bpf_map_info info = {}; + struct bpf_map_info info; u32 info_len = attr->info.info_len; int err; @@ -1538,6 +1539,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, return err; info_len = min_t(u32, sizeof(info), info_len); + memset(&info, 0, sizeof(info)); info.type = map->map_type; info.id = map->id; info.key_size = map->key_size; @@ -1583,7 +1585,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { - union bpf_attr attr = {}; + union bpf_attr attr; int err; if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) @@ -1595,6 +1597,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz size = min_t(u32, size, sizeof(attr)); /* copy attributes from user space, may be less than sizeof(bpf_attr) */ + memset(&attr, 0, sizeof(attr)); if (copy_from_user(&attr, uattr, size) != 0) return -EFAULT; diff --git a/kernel/cfi.c b/kernel/cfi.c index 967b0755c00e2155764f9997ecee5bfdfd391962..b23f6ede42ca6e2bb9488cd1b3adeec2eb420586 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -12,7 +12,6 @@ #include #include #include -#include #include /* Compiler-defined handler names */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 292b60183e57e00e23d85493a09d316a4c8dd283..1d166be184f7b3fd4754925c46f89f9fe3a15558 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -501,6 +501,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) */ p++; if (p >= end) { + (*pos)++; return NULL; } else { *pos = *p; @@ -824,7 +825,7 @@ void cgroup1_release_agent(struct work_struct *work) pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); - if (!pathbuf || !agentbuf) + if (!pathbuf || !agentbuf || !strlen(agentbuf)) goto out; spin_lock_irq(&css_set_lock); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index ea287f53c3a3da70843df836b95ff22083faffb0..a86b94e29d6bddd5afed002f8a3f9f7899bc6663 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4141,12 +4141,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) } } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); - if (!list_empty(&cset->tasks)) + if (!list_empty(&cset->tasks)) { it->task_pos = cset->tasks.next; - else if (!list_empty(&cset->mg_tasks)) + it->cur_tasks_head = &cset->tasks; + } else if (!list_empty(&cset->mg_tasks)) { it->task_pos = cset->mg_tasks.next; - else + it->cur_tasks_head = &cset->mg_tasks; + } else { it->task_pos = cset->dying_tasks.next; + it->cur_tasks_head = &cset->dying_tasks; + } it->tasks_head = &cset->tasks; it->mg_tasks_head = &cset->mg_tasks; @@ -4204,10 +4208,14 @@ static void css_task_iter_advance(struct css_task_iter *it) else it->task_pos = it->task_pos->next; - if (it->task_pos == it->tasks_head) + if (it->task_pos == it->tasks_head) { it->task_pos = it->mg_tasks_head->next; - if (it->task_pos == it->mg_tasks_head) + it->cur_tasks_head = it->mg_tasks_head; + } + if (it->task_pos == it->mg_tasks_head) { it->task_pos = it->dying_tasks_head->next; + it->cur_tasks_head = it->dying_tasks_head; + } if (it->task_pos == it->dying_tasks_head) css_task_iter_advance_css_set(it); } else { @@ -4226,11 +4234,12 @@ static void css_task_iter_advance(struct css_task_iter *it) goto repeat; /* and dying leaders w/o live member threads */ - if (!atomic_read(&task->signal->live)) + if (it->cur_tasks_head == it->dying_tasks_head && + !atomic_read(&task->signal->live)) goto repeat; } else { /* skip all dying ones */ - if (task->flags & PF_EXITING) + if (it->cur_tasks_head == it->dying_tasks_head) goto repeat; } } @@ -4339,6 +4348,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) struct kernfs_open_file *of = s->private; struct css_task_iter *it = of->priv; + if (pos) + (*pos)++; + return css_task_iter_next(it); } @@ -4354,7 +4366,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, * from position 0, so we can simply keep iterating on !0 *pos. */ if (!it) { - if (WARN_ON_ONCE((*pos)++)) + if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); it = kzalloc(sizeof(*it), GFP_KERNEL); @@ -4362,10 +4374,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, return ERR_PTR(-ENOMEM); of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); - } else if (!(*pos)++) { + } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); - } + } else + return it->cur_task; return cgroup_procs_next(s, NULL, NULL); } @@ -5925,6 +5938,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) return; } + /* Don't associate the sock with unrelated interrupted task's cgroup. */ + if (in_interrupt()) + return; + rcu_read_lock(); while (true) { diff --git a/kernel/cpu.c b/kernel/cpu.c index aaebab62f2f2488e882cf6e2d3a36d57697cb591..c185e19820d608939e1151d7c531722b1d38dabe 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -498,8 +498,7 @@ static int bringup_wait_for_ap(unsigned int cpu) if (WARN_ON_ONCE((!cpu_online(cpu)))) return -ECANCELED; - /* Unpark the stopper thread and the hotplug thread of the target cpu */ - stop_machine_unpark(cpu); + /* Unpark the hotplug thread of the target cpu */ kthread_unpark(st->thread); /* @@ -1089,8 +1088,8 @@ void notify_cpu_starting(unsigned int cpu) /* * Called from the idle task. Wake up the controlling task which brings the - * stopper and the hotplug thread of the upcoming CPU up and then delegates - * the rest of the online bringup to the hotplug thread. + * hotplug thread of the upcoming CPU up and then delegates the rest of the + * online bringup to the hotplug thread. */ void cpuhp_online_idle(enum cpuhp_state state) { @@ -1100,6 +1099,12 @@ void cpuhp_online_idle(enum cpuhp_state state) if (state != CPUHP_AP_ONLINE_IDLE) return; + /* + * Unpart the stopper thread before we start the idle loop (and start + * scheduling); this ensures the stopper task is always available. + */ + stop_machine_unpark(smp_processor_id()); + st->state = CPUHP_AP_ONLINE_IDLE; complete_ap_thread(st, true); } @@ -2174,10 +2179,8 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) */ cpuhp_offline_cpu_device(cpu); } - if (!ret) { + if (!ret) cpu_smt_control = ctrlval; - arch_smt_update(); - } cpu_maps_update_done(); return ret; } @@ -2188,7 +2191,6 @@ int cpuhp_smt_enable(void) cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; - arch_smt_update(); for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) diff --git a/kernel/events/core.c b/kernel/events/core.c index 162d824f7bfa37fb75e264447229fd97d688c853..0033bc900c4bcf0e3e41ab1b2fcf770b4c0579e2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6331,9 +6331,12 @@ static u64 perf_virt_to_phys(u64 virt) * Try IRQ-safe __get_user_pages_fast first. * If failed, leave phys_addr as 0. */ - if ((current->mm != NULL) && - (__get_user_pages_fast(virt, 1, 0, &p) == 1)) - phys_addr = page_to_phys(p) + virt % PAGE_SIZE; + if (current->mm != NULL) { + pagefault_disable(); + if (__get_user_pages_fast(virt, 1, 0, &p) == 1) + phys_addr = page_to_phys(p) + virt % PAGE_SIZE; + pagefault_enable(); + } if (p) put_page(p); @@ -6820,10 +6823,17 @@ static void perf_event_task_output(struct perf_event *event, goto out; task_event->event_id.pid = perf_event_pid(event, task); - task_event->event_id.ppid = perf_event_pid(event, current); - task_event->event_id.tid = perf_event_tid(event, task); - task_event->event_id.ptid = perf_event_tid(event, current); + + if (task_event->event_id.header.type == PERF_RECORD_EXIT) { + task_event->event_id.ppid = perf_event_pid(event, + task->real_parent); + task_event->event_id.ptid = perf_event_pid(event, + task->real_parent); + } else { /* PERF_RECORD_FORK */ + task_event->event_id.ppid = perf_event_pid(event, current); + task_event->event_id.ptid = perf_event_tid(event, current); + } task_event->event_id.time = perf_event_clock(event); diff --git a/kernel/futex.c b/kernel/futex.c index f5aae14c247b65b394ff6964dc8f3b350ad5e054..2921ebaa14676f3021e662897631146403600508 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -401,9 +401,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb) */ static struct futex_hash_bucket *hash_futex(union futex_key *key) { - u32 hash = jhash2((u32*)&key->both.word, - (sizeof(key->both.word)+sizeof(key->both.ptr))/4, + u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, key->both.offset); + return &futex_queues[hash & (futex_hashsize - 1)]; } @@ -445,7 +445,7 @@ static void get_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - ihold(key->shared.inode); /* implies smp_mb(); (B) */ + smp_mb(); /* explicit smp_mb(); (B) */ break; case FUT_OFF_MMSHARED: futex_get_mm(key); /* implies smp_mb(); (B) */ @@ -479,7 +479,6 @@ static void drop_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - iput(key->shared.inode); break; case FUT_OFF_MMSHARED: mmdrop(key->private.mm); @@ -487,6 +486,46 @@ static void drop_futex_key_refs(union futex_key *key) } } +/* + * Generate a machine wide unique identifier for this inode. + * + * This relies on u64 not wrapping in the life-time of the machine; which with + * 1ns resolution means almost 585 years. + * + * This further relies on the fact that a well formed program will not unmap + * the file while it has a (shared) futex waiting on it. This mapping will have + * a file reference which pins the mount and inode. + * + * If for some reason an inode gets evicted and read back in again, it will get + * a new sequence number and will _NOT_ match, even though it is the exact same + * file. + * + * It is important that match_futex() will never have a false-positive, esp. + * for PI futexes that can mess up the state. The above argues that false-negatives + * are only possible for malformed programs. + */ +static u64 get_inode_sequence_number(struct inode *inode) +{ + static atomic64_t i_seq; + u64 old; + + /* Does the inode already have a sequence number? */ + old = atomic64_read(&inode->i_sequence); + if (likely(old)) + return old; + + for (;;) { + u64 new = atomic64_add_return(1, &i_seq); + if (WARN_ON_ONCE(!new)) + continue; + + old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); + if (old) + return old; + return new; + } +} + /** * get_futex_key() - Get parameters which are the keys for a futex * @uaddr: virtual address of the futex @@ -499,9 +538,15 @@ static void drop_futex_key_refs(union futex_key *key) * * The key words are stored in @key on success. * - * For shared mappings, it's (page->index, file_inode(vma->vm_file), - * offset_within_page). For private mappings, it's (uaddr, current->mm). - * We can usually work out the index without swapping in the page. + * For shared mappings (when @fshared), the key is: + * ( inode->i_sequence, page->index, offset_within_page ) + * [ also see get_inode_sequence_number() ] + * + * For private mappings (or when !@fshared), the key is: + * ( current->mm, address, 0 ) + * + * This allows (cross process, where applicable) identification of the futex + * without keeping the page pinned for the duration of the FUTEX_WAIT. * * lock_page() might sleep, the caller should not hold a spinlock. */ @@ -641,8 +686,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) key->private.mm = mm; key->private.address = address; - get_futex_key_refs(key); /* implies smp_mb(); (B) */ - } else { struct inode *inode; @@ -674,40 +717,14 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) goto again; } - /* - * Take a reference unless it is about to be freed. Previously - * this reference was taken by ihold under the page lock - * pinning the inode in place so i_lock was unnecessary. The - * only way for this check to fail is if the inode was - * truncated in parallel which is almost certainly an - * application bug. In such a case, just retry. - * - * We are not calling into get_futex_key_refs() in file-backed - * cases, therefore a successful atomic_inc return below will - * guarantee that get_futex_key() will still imply smp_mb(); (B). - */ - if (!atomic_inc_not_zero(&inode->i_count)) { - rcu_read_unlock(); - put_page(page); - - goto again; - } - - /* Should be impossible but lets be paranoid for now */ - if (WARN_ON_ONCE(inode->i_mapping != mapping)) { - err = -EFAULT; - rcu_read_unlock(); - iput(inode); - - goto out; - } - key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = inode; + key->shared.i_seq = get_inode_sequence_number(inode); key->shared.pgoff = basepage_index(tail); rcu_read_unlock(); } + get_futex_key_refs(key); /* implies smp_mb(); (B) */ + out: put_page(page); return err; diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 6e40ff6be083dab77ad46edc01dbd95e4c0a854f..291e0797125b694334db84869b5db3676531d335 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -109,9 +109,9 @@ static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos) { struct gcov_iterator *iter = data; + (*pos)++; if (gcov_iter_next(iter)) return NULL; - (*pos)++; return iter; } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 317fc759de76160061cda5699a63998d2b3e8dc0..9da08b53d06a20d8f452757c9721af7d40bee2ad 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -480,8 +481,22 @@ static bool irq_may_run(struct irq_desc *desc) * If the interrupt is not in progress and is not an armed * wakeup interrupt, proceed. */ - if (!irqd_has_set(&desc->irq_data, mask)) + if (!irqd_has_set(&desc->irq_data, mask)) { +#ifdef CONFIG_PM_SLEEP + if (unlikely(desc->no_suspend_depth && + irqd_is_wakeup_set(&desc->irq_data))) { + unsigned int irq = irq_desc_get_irq(desc); + const char *name = "(unnamed)"; + + if (desc->action && desc->action->name) + name = desc->action->name; + + log_abnormal_wakeup_reason("misconfigured IRQ %u %s", + irq, name); + } +#endif return true; + } /* * If the interrupt is an armed wakeup source, mark it pending diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4ef7f3b820ce6aab4629fcf421bf53c6f0972de7..5230c47fc43e1e3f124b1ed62fb7b43173c07ef4 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -119,8 +119,6 @@ static inline void unregister_handler_proc(unsigned int irq, extern bool irq_can_set_affinity_usr(unsigned int irq); -extern int irq_select_affinity_usr(unsigned int irq); - extern void irq_set_thread_affinity(struct irq_desc *desc); extern int irq_do_set_affinity(struct irq_data *data, diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index b269ae16b10cdd84b322c638c73a9817a1d6575a..0d54f8256b9f401145e87ccd03820e676a2c8669 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1372,6 +1372,11 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, void *arg) { + if (!domain->ops->alloc) { + pr_debug("domain->ops->alloc() is NULL\n"); + return -ENOSYS; + } + return domain->ops->alloc(domain, irq_base, nr_irqs, arg); } @@ -1409,11 +1414,6 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, return -EINVAL; } - if (!domain->ops->alloc) { - pr_debug("domain->ops->alloc() is NULL\n"); - return -ENOSYS; - } - if (realloc && irq_base >= 0) { virq = irq_base; } else { diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9c86a3e451101feb0661982081ef21fcf489006c..5277949e82e09e1ecdc7a92f4d8471e1c4171248 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -224,7 +224,11 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, if (desc->affinity_notify) { kref_get(&desc->affinity_notify->kref); - schedule_work(&desc->affinity_notify->work); + if (!schedule_work(&desc->affinity_notify->work)) { + /* Work was already scheduled, drop our extra ref */ + kref_put(&desc->affinity_notify->kref, + desc->affinity_notify->release); + } } irqd_set(data, IRQD_AFFINITY_SET); @@ -324,7 +328,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) raw_spin_unlock_irqrestore(&desc->lock, flags); if (old_notify) { - cancel_work_sync(&old_notify->work); + if (cancel_work_sync(&old_notify->work)) { + /* Pending work had a ref, put that one too */ + kref_put(&old_notify->kref, old_notify->release); + } kref_put(&old_notify->kref, old_notify->release); } @@ -382,23 +389,9 @@ int irq_setup_affinity(struct irq_desc *desc) { return irq_select_affinity(irq_desc_get_irq(desc)); } -#endif +#endif /* CONFIG_AUTO_IRQ_AFFINITY */ +#endif /* CONFIG_SMP */ -/* - * Called when a bogus affinity is set via /proc/irq - */ -int irq_select_affinity_usr(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&desc->lock, flags); - ret = irq_setup_affinity(desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; -} -#endif /** * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index d4e8d20dd64e39af8ca0e99043ad5f59379f98e0..374108c5bbdeffd0c8415a8718dea50fe88986c1 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -117,6 +117,28 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v) return show_irq_affinity(AFFINITY_LIST, m); } +#ifndef CONFIG_AUTO_IRQ_AFFINITY +static inline int irq_select_affinity_usr(unsigned int irq) +{ + /* + * If the interrupt is started up already then this fails. The + * interrupt is assigned to an online CPU already. There is no + * point to move it around randomly. Tell user space that the + * selected mask is bogus. + * + * If not then any change to the affinity is pointless because the + * startup code invokes irq_setup_affinity() which will select + * a online CPU anyway. + */ + return -EINVAL; +} +#else +/* ALPHA magic affinity auto selector. Keep it for historical reasons. */ +static inline int irq_select_affinity_usr(unsigned int irq) +{ + return irq_select_affinity(irq); +} +#endif static ssize_t write_irq_affinity(int type, struct file *file, const char __user *buffer, size_t count, loff_t *pos) diff --git a/kernel/kmod.c b/kernel/kmod.c index bc6addd9152b4db58c57757f1fe0a2911a16e459..a2de58de6ab6259c266e0901e3e9dd212074cf83 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -120,7 +120,7 @@ static int call_modprobe(char *module_name, int wait) * invoke it. * * If module auto-loading support is disabled then this function - * becomes a no-operation. + * simply returns -ENOENT. */ int __request_module(bool wait, const char *fmt, ...) { @@ -137,7 +137,7 @@ int __request_module(bool wait, const char *fmt, ...) WARN_ON_ONCE(wait && current_is_async()); if (!modprobe_path[0]) - return 0; + return -ENOENT; va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d0fe20a5475f722b0e7fa303b4d8ff08b8b3f647..66f1818d47620be11c91a742f975399be18c5db7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -523,6 +523,8 @@ static void do_unoptimize_kprobes(void) arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); /* Loop free_list for disarming */ list_for_each_entry_safe(op, tmp, &freeing_list, list) { + /* Switching from detour code to origin */ + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; /* Disarm probes if marked disabled */ if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); @@ -623,6 +625,18 @@ void wait_for_kprobe_optimizer(void) mutex_unlock(&kprobe_mutex); } +static bool optprobe_queued_unopt(struct optimized_kprobe *op) +{ + struct optimized_kprobe *_op; + + list_for_each_entry(_op, &unoptimizing_list, list) { + if (op == _op) + return true; + } + + return false; +} + /* Optimize kprobe if p is ready to be optimized */ static void optimize_kprobe(struct kprobe *p) { @@ -644,17 +658,21 @@ static void optimize_kprobe(struct kprobe *p) return; /* Check if it is already optimized. */ - if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) + if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) { + if (optprobe_queued_unopt(op)) { + /* This is under unoptimizing. Just dequeue the probe */ + list_del_init(&op->list); + } return; + } op->kp.flags |= KPROBE_FLAG_OPTIMIZED; - if (!list_empty(&op->list)) - /* This is under unoptimizing. Just dequeue the probe */ - list_del_init(&op->list); - else { - list_add(&op->list, &optimizing_list); - kick_kprobe_optimizer(); - } + /* On unoptimizing/optimizing_list, op must have OPTIMIZED flag */ + if (WARN_ON_ONCE(!list_empty(&op->list))) + return; + + list_add(&op->list, &optimizing_list); + kick_kprobe_optimizer(); } /* Short cut to direct unoptimizing */ @@ -662,6 +680,7 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op) { lockdep_assert_cpus_held(); arch_unoptimize_kprobe(op); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); } @@ -675,31 +694,33 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) return; /* This is not an optprobe nor optimized */ op = container_of(p, struct optimized_kprobe, kp); - if (!kprobe_optimized(p)) { - /* Unoptimized or unoptimizing case */ - if (force && !list_empty(&op->list)) { - /* - * Only if this is unoptimizing kprobe and forced, - * forcibly unoptimize it. (No need to unoptimize - * unoptimized kprobe again :) - */ - list_del_init(&op->list); - force_unoptimize_kprobe(op); - } + if (!kprobe_optimized(p)) return; - } - op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (!list_empty(&op->list)) { - /* Dequeue from the optimization queue */ - list_del_init(&op->list); + if (optprobe_queued_unopt(op)) { + /* Queued in unoptimizing queue */ + if (force) { + /* + * Forcibly unoptimize the kprobe here, and queue it + * in the freeing list for release afterwards. + */ + force_unoptimize_kprobe(op); + list_move(&op->list, &freeing_list); + } + } else { + /* Dequeue from the optimizing queue */ + list_del_init(&op->list); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; + } return; } + /* Optimized kprobe case */ - if (force) + if (force) { /* Forcibly update the code: this is a special case */ force_unoptimize_kprobe(op); - else { + } else { list_add(&op->list, &unoptimizing_list); kick_kprobe_optimizer(); } diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 90a3469a7a888780f5bdce835234d5849aa22d7a..03e3ab61a2edd6ff7645be5dcdd1c99694085983 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1297,9 +1297,11 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class) this.class = class; raw_local_irq_save(flags); + current->lockdep_recursion = 1; arch_spin_lock(&lockdep_lock); ret = __lockdep_count_forward_deps(&this); arch_spin_unlock(&lockdep_lock); + current->lockdep_recursion = 0; raw_local_irq_restore(flags); return ret; @@ -1324,9 +1326,11 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class) this.class = class; raw_local_irq_save(flags); + current->lockdep_recursion = 1; arch_spin_lock(&lockdep_lock); ret = __lockdep_count_backward_deps(&this); arch_spin_unlock(&lockdep_lock); + current->lockdep_recursion = 0; raw_local_irq_restore(flags); return ret; diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 6dca260eeccf29bca8ccd92ba543e6e262c6c32b..032868be325940e30554ad00c8fdb8f7aa921303 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -723,10 +723,10 @@ static void __torture_print_stats(char *page, if (statp[i].n_lock_fail) fail = true; sum += statp[i].n_lock_acquired; - if (max < statp[i].n_lock_fail) - max = statp[i].n_lock_fail; - if (min > statp[i].n_lock_fail) - min = statp[i].n_lock_fail; + if (max < statp[i].n_lock_acquired) + max = statp[i].n_lock_acquired; + if (min > statp[i].n_lock_acquired) + min = statp[i].n_lock_acquired; } page += sprintf(page, "%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", diff --git a/kernel/notifier.c b/kernel/notifier.c index 6196af8a82230024ba98647c7fe946bf842bd102..59a1e9b48a6af80d834c8fc764d07da0572cc4fe 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -552,7 +552,7 @@ NOKPROBE_SYMBOL(notify_die); int register_die_notifier(struct notifier_block *nb) { - vmalloc_sync_all(); + vmalloc_sync_mappings(); return atomic_notifier_chain_register(&die_chain, nb); } EXPORT_SYMBOL_GPL(register_die_notifier); diff --git a/kernel/padata.c b/kernel/padata.c index 87540ce72aea6bcef0126e8d172b2b8cd8d6f80f..a71620d2b8bab46d81d4f590a6212a7f010d1eba 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -34,6 +34,8 @@ #define MAX_OBJ_NUM 1000 +static void padata_free_pd(struct parallel_data *pd); + static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) { int cpu, target_cpu; @@ -292,6 +294,7 @@ static void padata_serial_worker(struct work_struct *serial_work) struct padata_serial_queue *squeue; struct parallel_data *pd; LIST_HEAD(local_list); + int cnt; local_bh_disable(); squeue = container_of(serial_work, struct padata_serial_queue, work); @@ -301,6 +304,8 @@ static void padata_serial_worker(struct work_struct *serial_work) list_replace_init(&squeue->serial.list, &local_list); spin_unlock(&squeue->serial.lock); + cnt = 0; + while (!list_empty(&local_list)) { struct padata_priv *padata; @@ -310,9 +315,12 @@ static void padata_serial_worker(struct work_struct *serial_work) list_del_init(&padata->list); padata->serial(padata); - atomic_dec(&pd->refcnt); + cnt++; } local_bh_enable(); + + if (atomic_sub_and_test(cnt, &pd->refcnt)) + padata_free_pd(pd); } /** @@ -435,7 +443,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); atomic_set(&pd->seq_nr, -1); atomic_set(&pd->reorder_objects, 0); - atomic_set(&pd->refcnt, 0); + atomic_set(&pd->refcnt, 1); pd->pinst = pinst; spin_lock_init(&pd->lock); @@ -460,31 +468,6 @@ static void padata_free_pd(struct parallel_data *pd) kfree(pd); } -/* Flush all objects out of the padata queues. */ -static void padata_flush_queues(struct parallel_data *pd) -{ - int cpu; - struct padata_parallel_queue *pqueue; - struct padata_serial_queue *squeue; - - for_each_cpu(cpu, pd->cpumask.pcpu) { - pqueue = per_cpu_ptr(pd->pqueue, cpu); - flush_work(&pqueue->work); - } - - del_timer_sync(&pd->timer); - - if (atomic_read(&pd->reorder_objects)) - padata_reorder(pd); - - for_each_cpu(cpu, pd->cpumask.cbcpu) { - squeue = per_cpu_ptr(pd->squeue, cpu); - flush_work(&squeue->work); - } - - BUG_ON(atomic_read(&pd->refcnt) != 0); -} - static void __padata_start(struct padata_instance *pinst) { pinst->flags |= PADATA_INIT; @@ -498,10 +481,6 @@ static void __padata_stop(struct padata_instance *pinst) pinst->flags &= ~PADATA_INIT; synchronize_rcu(); - - get_online_cpus(); - padata_flush_queues(pinst->pd); - put_online_cpus(); } /* Replace the internal control structure with a new one. */ @@ -522,8 +501,8 @@ static void padata_replace(struct padata_instance *pinst, if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu)) notification_mask |= PADATA_CPU_SERIAL; - padata_flush_queues(pd_old); - padata_free_pd(pd_old); + if (atomic_dec_and_test(&pd_old->refcnt)) + padata_free_pd(pd_old); if (notification_mask) blocking_notifier_call_chain(&pinst->cpumask_change_notifier, @@ -626,8 +605,8 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, struct cpumask *serial_mask, *parallel_mask; int err = -EINVAL; - mutex_lock(&pinst->lock); get_online_cpus(); + mutex_lock(&pinst->lock); switch (cpumask_type) { case PADATA_CPU_PARALLEL: @@ -645,8 +624,8 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, err = __padata_set_cpumasks(pinst, parallel_mask, serial_mask); out: - put_online_cpus(); mutex_unlock(&pinst->lock); + put_online_cpus(); return err; } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 376716bfd057e6a0b360a56315bda7e0928e9a03..4b007fd99af29a744ff6b6e625d4d08a7ac3a268 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -897,6 +897,13 @@ static int software_resume(void) error = freeze_processes(); if (error) goto Close_Finish; + + error = freeze_kernel_threads(); + if (error) { + thaw_processes(); + goto Close_Finish; + } + error = load_image_and_restore(); thaw_processes(); place_marker("M - PM: Thaw processes completed!"); diff --git a/kernel/power/process.c b/kernel/power/process.c index 659cd91f1c6529904584878cf5db7c2d15bbe5d5..1ae7f93efde952ecaaa285abf37ae538c91089a8 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -23,7 +23,6 @@ #include #include #include -#include /* * Timeout for stopping processes @@ -40,9 +39,6 @@ static int try_to_freeze_tasks(bool user_only) unsigned int elapsed_msecs; bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; -#ifdef CONFIG_PM_SLEEP - char suspend_abort[MAX_SUSPEND_ABORT_LEN]; -#endif start = ktime_get_boottime(); @@ -72,11 +68,6 @@ static int try_to_freeze_tasks(bool user_only) break; if (pm_wakeup_pending()) { -#ifdef CONFIG_PM_SLEEP - pm_get_active_wakeup_sources(suspend_abort, - MAX_SUSPEND_ABORT_LEN); - log_suspend_abort_reason(suspend_abort); -#endif wakeup = true; break; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index ad7e726085be448d61a3e43c8441c620d68d0e2f..2ac0ed3b08167cd2ef134ad2dc73b9ce6230bf7a 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -149,6 +149,7 @@ static void s2idle_loop(void) break; pm_wakeup_clear(false); + clear_wakeup_reasons(); } pm_pr_dbg("resume from suspend-to-idle\n"); @@ -362,6 +363,7 @@ static int suspend_prepare(suspend_state_t state) if (!error) return 0; + log_suspend_abort_reason("One or more tasks refusing to freeze"); suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); Finish: @@ -391,7 +393,6 @@ void __weak arch_suspend_enable_irqs(void) */ static int suspend_enter(suspend_state_t state, bool *wakeup) { - char suspend_abort[MAX_SUSPEND_ABORT_LEN]; int error, last_dev; error = platform_suspend_prepare(state); @@ -403,8 +404,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; last_dev %= REC_FAILED_NUM; pr_err("late suspend of devices failed\n"); - log_suspend_abort_reason("%s device failed to power down", - suspend_stats.failed_devs[last_dev]); + log_suspend_abort_reason("late suspend of %s device failed", + suspend_stats.failed_devs[last_dev]); goto Platform_finish; } error = platform_suspend_prepare_late(state); @@ -422,7 +423,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) last_dev %= REC_FAILED_NUM; pr_err("noirq suspend of devices failed\n"); log_suspend_abort_reason("noirq suspend of %s device failed", - suspend_stats.failed_devs[last_dev]); + suspend_stats.failed_devs[last_dev]); goto Platform_early_resume; } error = platform_suspend_prepare_noirq(state); @@ -451,9 +452,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) trace_suspend_resume(TPS("machine_suspend"), state, false); } else if (*wakeup) { - pm_get_active_wakeup_sources(suspend_abort, - MAX_SUSPEND_ABORT_LEN); - log_suspend_abort_reason(suspend_abort); error = -EBUSY; } syscore_resume(); @@ -486,7 +484,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) */ int suspend_devices_and_enter(suspend_state_t state) { - int error, last_dev; + int error; bool wakeup = false; if (!sleep_state_supported(state)) @@ -502,11 +500,9 @@ int suspend_devices_and_enter(suspend_state_t state) suspend_test_start(); error = dpm_suspend_start(PMSG_SUSPEND); if (error) { - last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; - last_dev %= REC_FAILED_NUM; pr_err("Some devices failed to suspend, or early wake event detected\n"); - log_suspend_abort_reason("%s device failed to suspend, or early wake event detected", - suspend_stats.failed_devs[last_dev]); + log_suspend_abort_reason( + "Some devices failed to suspend, or early wake event detected"); goto Recover_platform; } suspend_test_finish("suspend devices"); diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 252611fad2fee221c0cee0f3dc563bdd88a4fc3a..3c118c0446330f4b43a93811da79828edf6e2288 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -4,7 +4,7 @@ * Logs the reasons which caused the kernel to resume from * the suspend mode. * - * Copyright (C) 2014 Google, Inc. + * Copyright (C) 2020 Google, Inc. * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and * may be copied, distributed, and modified under those terms. @@ -26,70 +26,315 @@ #include #include #include +#include +/* + * struct wakeup_irq_node - stores data and relationships for IRQs logged as + * either base or nested wakeup reasons during suspend/resume flow. + * @siblings - for membership on leaf or parent IRQ lists + * @irq - the IRQ number + * @irq_name - the name associated with the IRQ, or a default if none + */ +struct wakeup_irq_node { + struct list_head siblings; + int irq; + const char *irq_name; +}; + +static DEFINE_SPINLOCK(wakeup_reason_lock); + +static LIST_HEAD(leaf_irqs); /* kept in ascending IRQ sorted order */ +static LIST_HEAD(parent_irqs); /* unordered */ -#define MAX_WAKEUP_REASON_IRQS 32 -static int irq_list[MAX_WAKEUP_REASON_IRQS]; -static int irqcount; +static struct kmem_cache *wakeup_irq_nodes_cache; + +static const char *default_irq_name = "(unnamed)"; + +static struct kobject *kobj; + +static bool capture_reasons; static bool suspend_abort; -static char abort_reason[MAX_SUSPEND_ABORT_LEN]; -static struct kobject *wakeup_reason; -static DEFINE_SPINLOCK(resume_reason_lock); +static bool abnormal_wake; +static char non_irq_wake_reason[MAX_SUSPEND_ABORT_LEN]; static ktime_t last_monotime; /* monotonic time before last suspend */ static ktime_t curr_monotime; /* monotonic time after last suspend */ static ktime_t last_stime; /* monotonic boottime offset before last suspend */ static ktime_t curr_stime; /* monotonic boottime offset after last suspend */ -static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) +static void init_node(struct wakeup_irq_node *p, int irq) { - int irq_no, buf_offset = 0; struct irq_desc *desc; - spin_lock(&resume_reason_lock); - if (suspend_abort) { - buf_offset = sprintf(buf, "Abort: %s", abort_reason); - } else { - for (irq_no = 0; irq_no < irqcount; irq_no++) { - desc = irq_to_desc(irq_list[irq_no]); - if (desc && desc->action && desc->action->name) - buf_offset += sprintf(buf + buf_offset, "%d %s\n", - irq_list[irq_no], desc->action->name); + + INIT_LIST_HEAD(&p->siblings); + + p->irq = irq; + desc = irq_to_desc(irq); + if (desc && desc->action && desc->action->name) + p->irq_name = desc->action->name; + else + p->irq_name = default_irq_name; +} + +static struct wakeup_irq_node *create_node(int irq) +{ + struct wakeup_irq_node *result; + + result = kmem_cache_alloc(wakeup_irq_nodes_cache, GFP_ATOMIC); + if (unlikely(!result)) + pr_warn("Failed to log wakeup IRQ %d\n", irq); + else + init_node(result, irq); + + return result; +} + +static void delete_list(struct list_head *head) +{ + struct wakeup_irq_node *n; + + while (!list_empty(head)) { + n = list_first_entry(head, struct wakeup_irq_node, siblings); + list_del(&n->siblings); + kmem_cache_free(wakeup_irq_nodes_cache, n); + } +} + +static bool add_sibling_node_sorted(struct list_head *head, int irq) +{ + struct wakeup_irq_node *n = NULL; + struct list_head *predecessor = head; + + if (unlikely(WARN_ON(!head))) + return NULL; + + if (!list_empty(head)) + list_for_each_entry(n, head, siblings) { + if (n->irq < irq) + predecessor = &n->siblings; + else if (n->irq == irq) + return true; else - buf_offset += sprintf(buf + buf_offset, "%d\n", - irq_list[irq_no]); + break; + } + + n = create_node(irq); + if (n) { + list_add(&n->siblings, predecessor); + return true; + } + + return false; +} + +static struct wakeup_irq_node *find_node_in_list(struct list_head *head, + int irq) +{ + struct wakeup_irq_node *n; + + if (unlikely(WARN_ON(!head))) + return NULL; + + list_for_each_entry(n, head, siblings) + if (n->irq == irq) + return n; + + return NULL; +} + +void log_irq_wakeup_reason(int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + if (!capture_reasons) { + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return; + } + + if (find_node_in_list(&parent_irqs, irq) == NULL) + add_sibling_node_sorted(&leaf_irqs, irq); + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); +} + +void log_threaded_irq_wakeup_reason(int irq, int parent_irq) +{ + struct wakeup_irq_node *parent; + unsigned long flags; + + /* + * Intentionally unsynchronized. Calls that come in after we have + * resumed should have a fast exit path since there's no work to be + * done, any any coherence issue that could cause a wrong value here is + * both highly improbable - given the set/clear timing - and very low + * impact (parent IRQ gets logged instead of the specific child). + */ + if (!capture_reasons) + return; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + if (!capture_reasons || (find_node_in_list(&leaf_irqs, irq) != NULL)) { + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return; + } + + parent = find_node_in_list(&parent_irqs, parent_irq); + if (parent != NULL) + add_sibling_node_sorted(&leaf_irqs, irq); + else { + parent = find_node_in_list(&leaf_irqs, parent_irq); + if (parent != NULL) { + list_del_init(&parent->siblings); + list_add_tail(&parent->siblings, &parent_irqs); + add_sibling_node_sorted(&leaf_irqs, irq); } } - spin_unlock(&resume_reason_lock); + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); +} + +static void __log_abort_or_abnormal_wake(bool abort, const char *fmt, + va_list args) +{ + unsigned long flags; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + /* Suspend abort or abnormal wake reason has already been logged. */ + if (suspend_abort || abnormal_wake) { + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return; + } + + suspend_abort = abort; + abnormal_wake = !abort; + vsnprintf(non_irq_wake_reason, MAX_SUSPEND_ABORT_LEN, fmt, args); + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); +} + +void log_suspend_abort_reason(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + __log_abort_or_abnormal_wake(true, fmt, args); + va_end(args); +} + +void log_abnormal_wakeup_reason(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + __log_abort_or_abnormal_wake(false, fmt, args); + va_end(args); +} + +void clear_wakeup_reasons(void) +{ + unsigned long flags; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + delete_list(&leaf_irqs); + delete_list(&parent_irqs); + suspend_abort = false; + abnormal_wake = false; + capture_reasons = true; + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); +} + +static void print_wakeup_sources(void) +{ + struct wakeup_irq_node *n; + unsigned long flags; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + capture_reasons = false; + + if (suspend_abort) { + pr_info("Abort: %s\n", non_irq_wake_reason); + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return; + } + + if (!list_empty(&leaf_irqs)) + list_for_each_entry(n, &leaf_irqs, siblings) + pr_info("Resume caused by IRQ %d, %s\n", n->irq, + n->irq_name); + else if (abnormal_wake) + pr_info("Resume caused by %s\n", non_irq_wake_reason); + else + pr_info("Resume cause unknown\n"); + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); +} + +static ssize_t last_resume_reason_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + ssize_t buf_offset = 0; + struct wakeup_irq_node *n; + unsigned long flags; + + spin_lock_irqsave(&wakeup_reason_lock, flags); + + if (suspend_abort) { + buf_offset = scnprintf(buf, PAGE_SIZE, "Abort: %s", + non_irq_wake_reason); + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return buf_offset; + } + + if (!list_empty(&leaf_irqs)) + list_for_each_entry(n, &leaf_irqs, siblings) + buf_offset += scnprintf(buf + buf_offset, + PAGE_SIZE - buf_offset, + "%d %s\n", n->irq, n->irq_name); + else if (abnormal_wake) + buf_offset = scnprintf(buf, PAGE_SIZE, "-1 %s", + non_irq_wake_reason); + + spin_unlock_irqrestore(&wakeup_reason_lock, flags); + return buf_offset; } static ssize_t last_suspend_time_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct timespec sleep_time; - struct timespec total_time; - struct timespec suspend_resume_time; + struct timespec64 sleep_time; + struct timespec64 total_time; + struct timespec64 suspend_resume_time; /* * total_time is calculated from monotonic bootoffsets because * unlike CLOCK_MONOTONIC it include the time spent in suspend state. */ - total_time = ktime_to_timespec(ktime_sub(curr_stime, last_stime)); + total_time = ktime_to_timespec64(ktime_sub(curr_stime, last_stime)); /* * suspend_resume_time is calculated as monotonic (CLOCK_MONOTONIC) * time interval before entering suspend and post suspend. */ - suspend_resume_time = ktime_to_timespec(ktime_sub(curr_monotime, last_monotime)); + suspend_resume_time = + ktime_to_timespec64(ktime_sub(curr_monotime, last_monotime)); /* sleep_time = total_time - suspend_resume_time */ - sleep_time = timespec_sub(total_time, suspend_resume_time); + sleep_time = timespec64_sub(total_time, suspend_resume_time); /* Export suspend_resume_time and sleep_time in pair here. */ - return sprintf(buf, "%lu.%09lu %lu.%09lu\n", - suspend_resume_time.tv_sec, suspend_resume_time.tv_nsec, - sleep_time.tv_sec, sleep_time.tv_nsec); + return sprintf(buf, "%llu.%09lu %llu.%09lu\n", + (unsigned long long)suspend_resume_time.tv_sec, + suspend_resume_time.tv_nsec, + (unsigned long long)sleep_time.tv_sec, + sleep_time.tv_nsec); } static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); @@ -104,86 +349,24 @@ static struct attribute_group attr_group = { .attrs = attrs, }; -/* - * logs all the wake up reasons to the kernel - * stores the irqs to expose them to the userspace via sysfs - */ -void log_wakeup_reason(int irq) -{ - struct irq_desc *desc; - desc = irq_to_desc(irq); - if (desc && desc->action && desc->action->name) - printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq, - desc->action->name); - else - printk(KERN_INFO "Resume caused by IRQ %d\n", irq); - - spin_lock(&resume_reason_lock); - if (irqcount == MAX_WAKEUP_REASON_IRQS) { - spin_unlock(&resume_reason_lock); - printk(KERN_WARNING "Resume caused by more than %d IRQs\n", - MAX_WAKEUP_REASON_IRQS); - return; - } - - irq_list[irqcount++] = irq; - spin_unlock(&resume_reason_lock); -} - -int check_wakeup_reason(int irq) -{ - int irq_no; - int ret = false; - - spin_lock(&resume_reason_lock); - for (irq_no = 0; irq_no < irqcount; irq_no++) - if (irq_list[irq_no] == irq) { - ret = true; - break; - } - spin_unlock(&resume_reason_lock); - return ret; -} - -void log_suspend_abort_reason(const char *fmt, ...) -{ - va_list args; - - spin_lock(&resume_reason_lock); - - //Suspend abort reason has already been logged. - if (suspend_abort) { - spin_unlock(&resume_reason_lock); - return; - } - - suspend_abort = true; - va_start(args, fmt); - vsnprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args); - va_end(args); - spin_unlock(&resume_reason_lock); -} - /* Detects a suspend and clears all the previous wake up reasons*/ static int wakeup_reason_pm_event(struct notifier_block *notifier, unsigned long pm_event, void *unused) { switch (pm_event) { case PM_SUSPEND_PREPARE: - spin_lock(&resume_reason_lock); - irqcount = 0; - suspend_abort = false; - spin_unlock(&resume_reason_lock); /* monotonic time since boot */ last_monotime = ktime_get(); /* monotonic time since boot including the time spent in suspend */ last_stime = ktime_get_boottime(); + clear_wakeup_reasons(); break; case PM_POST_SUSPEND: /* monotonic time since boot */ curr_monotime = ktime_get(); /* monotonic time since boot including the time spent in suspend */ curr_stime = ktime_get_boottime(); + print_wakeup_sources(); break; default: break; @@ -195,31 +378,40 @@ static struct notifier_block wakeup_reason_pm_notifier_block = { .notifier_call = wakeup_reason_pm_event, }; -/* Initializes the sysfs parameter - * registers the pm_event notifier - */ -int __init wakeup_reason_init(void) +static int __init wakeup_reason_init(void) { - int retval; - - retval = register_pm_notifier(&wakeup_reason_pm_notifier_block); - if (retval) - printk(KERN_WARNING "[%s] failed to register PM notifier %d\n", - __func__, retval); + if (register_pm_notifier(&wakeup_reason_pm_notifier_block)) { + pr_warn("[%s] failed to register PM notifier\n", __func__); + goto fail; + } - wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj); - if (!wakeup_reason) { - printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n", - __func__); - return 1; + kobj = kobject_create_and_add("wakeup_reasons", kernel_kobj); + if (!kobj) { + pr_warn("[%s] failed to create a sysfs kobject\n", __func__); + goto fail_unregister_pm_notifier; } - retval = sysfs_create_group(wakeup_reason, &attr_group); - if (retval) { - kobject_put(wakeup_reason); - printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n", - __func__, retval); + + if (sysfs_create_group(kobj, &attr_group)) { + pr_warn("[%s] failed to create a sysfs group\n", __func__); + goto fail_kobject_put; } + + wakeup_irq_nodes_cache = + kmem_cache_create("wakeup_irq_node_cache", + sizeof(struct wakeup_irq_node), 0, 0, NULL); + if (!wakeup_irq_nodes_cache) + goto fail_remove_group; + return 0; + +fail_remove_group: + sysfs_remove_group(kobj, &attr_group); +fail_kobject_put: + kobject_put(kobj); +fail_unregister_pm_notifier: + unregister_pm_notifier(&wakeup_reason_pm_notifier_block); +fail: + return 1; } late_initcall(wakeup_reason_init); diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 3f85da02be4ea53230a2235b33832a07f7527267..bc4b72527b027b8f6c5af3f7c255ac726de9e806 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -186,7 +186,8 @@ static void group_init(struct psi_group *group) for_each_possible_cpu(cpu) seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq); - group->avg_next_update = sched_clock() + psi_period; + group->avg_last_update = sched_clock(); + group->avg_next_update = group->avg_last_update + psi_period; INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work); mutex_init(&group->avgs_lock); /* Init trigger-related members */ @@ -482,7 +483,7 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value) u32 remaining; remaining = win->size - elapsed; - growth += div_u64(win->prev_growth * remaining, win->size); + growth += div64_u64(win->prev_growth * remaining, win->size); } return growth; @@ -1199,7 +1200,10 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, if (static_branch_likely(&psi_disabled)) return -EOPNOTSUPP; - buf_size = min(nbytes, (sizeof(buf) - 1)); + if (!nbytes) + return -EINVAL; + + buf_size = min(nbytes, sizeof(buf)); if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 98aa8c1a6eb8076aebc60f7dc4823e91eccde418..dc63c7ae9e737a6e80fb7010f1e9b8c65c2b220d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -159,7 +159,13 @@ static inline void cpu_load_update_active(struct rq *this_rq) { } #ifdef CONFIG_64BIT # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) # define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT) -# define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT) +# define scale_load_down(w) \ +({ \ + unsigned long __w = (w); \ + if (__w) \ + __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \ + __w; \ +}) #else # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT) # define scale_load(w) (w) diff --git a/kernel/signal.c b/kernel/signal.c index 84b028ece696c03a9c977eac152a7b506461a0e2..a067e2c8942beb177b7d82d2daaf8334b192b0e2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -383,27 +383,32 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi { struct sigqueue *q = NULL; struct user_struct *user; + int sigpending; /* * Protect access to @t credentials. This can go away when all * callers hold rcu read lock. + * + * NOTE! A pending signal will hold on to the user refcount, + * and we get/put the refcount only when the sigpending count + * changes from/to zero. */ rcu_read_lock(); - user = get_uid(__task_cred(t)->user); - atomic_inc(&user->sigpending); + user = __task_cred(t)->user; + sigpending = atomic_inc_return(&user->sigpending); + if (sigpending == 1) + get_uid(user); rcu_read_unlock(); - if (override_rlimit || - atomic_read(&user->sigpending) <= - task_rlimit(t, RLIMIT_SIGPENDING)) { + if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { q = kmem_cache_alloc(sigqueue_cachep, flags); } else { print_dropped_signal(sig); } if (unlikely(q == NULL)) { - atomic_dec(&user->sigpending); - free_uid(user); + if (atomic_dec_and_test(&user->sigpending)) + free_uid(user); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; @@ -417,8 +422,8 @@ static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) return; - atomic_dec(&q->user->sigpending); - free_uid(q->user); + if (atomic_dec_and_test(&q->user->sigpending)) + free_uid(q->user); kmem_cache_free(sigqueue_cachep, q); } @@ -1688,7 +1693,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig) * This is only possible if parent == real_parent. * Check if it has changed security domain. */ - if (tsk->parent_exec_id != tsk->parent->self_exec_id) + if (tsk->parent_exec_id != READ_ONCE(tsk->parent->self_exec_id)) sig = SIGCHLD; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0da27380fc79cfc5b79b113f74922aaacd899353..5cc253f8fbf73688b5ffbcbc55fc3d0dd5376c2b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6319,9 +6319,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) struct trace_array *tr = m->private; struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids); - if (v == FTRACE_NO_PIDS) + if (v == FTRACE_NO_PIDS) { + (*pos)++; return NULL; - + } return trace_pid_next(pid_list, v, pos); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aea0f442516e0113aa79a7f7606f184810d9efd4..04b5e4cef44de25cc940e9909b38c025c9bae138 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1544,6 +1544,7 @@ static __init int init_trace_selftests(void) pr_info("Running postponed tracer tests:\n"); + tracing_selftest_running = true; list_for_each_entry_safe(p, n, &postponed_selftests, list) { ret = run_tracer_selftest(p->type); /* If the test fails, then warn and remove from available_tracers */ @@ -1562,6 +1563,7 @@ static __init int init_trace_selftests(void) list_del(&p->list); kfree(p); } + tracing_selftest_running = false; out: mutex_unlock(&trace_types_lock); @@ -7729,6 +7731,7 @@ static int instance_mkdir(const char *name) struct trace_array *tr; int ret; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = -EEXIST; @@ -7784,6 +7787,7 @@ static int instance_mkdir(const char *name) list_add(&tr->list, &ftrace_trace_arrays); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return 0; @@ -7795,6 +7799,7 @@ static int instance_mkdir(const char *name) out_unlock: mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; @@ -7807,6 +7812,7 @@ static int instance_rmdir(const char *name) int ret; int i; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = -ENODEV; @@ -7852,6 +7858,7 @@ static int instance_rmdir(const char *name) out_unlock: mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 2b0a01b2be2d1d05445cb954630ff6d59fe95dad..421166a39253aa7c23c53bb1fee60977db44fd6f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1403,8 +1403,8 @@ static int subsystem_open(struct inode *inode, struct file *filp) return -ENODEV; /* Make sure the system still exists */ - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { list_for_each_entry(dir, &tr->systems, list) { if (dir == inode->i_private) { @@ -1418,8 +1418,8 @@ static int subsystem_open(struct inode *inode, struct file *filp) } } exit_loop: - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); if (!system) return -ENODEV; @@ -2305,15 +2305,15 @@ static void __add_event_to_tracers(struct trace_event_call *call); int trace_add_event_call(struct trace_event_call *call) { int ret; - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); ret = __register_event(call, NULL); if (ret >= 0) __add_event_to_tracers(call); - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; } @@ -2367,13 +2367,13 @@ int trace_remove_event_call(struct trace_event_call *call) { int ret; - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); down_write(&trace_event_sem); ret = probe_remove_event_call(call); up_write(&trace_event_sem); - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; } @@ -2435,8 +2435,8 @@ static int trace_module_notify(struct notifier_block *self, { struct module *mod = data; - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); switch (val) { case MODULE_STATE_COMING: trace_module_add_events(mod); @@ -2445,8 +2445,8 @@ static int trace_module_notify(struct notifier_block *self, trace_module_remove_events(mod); break; } - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return 0; } @@ -2961,24 +2961,24 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) * creates the event hierachry in the @parent/events directory. * * Returns 0 on success. + * + * Must be called with event_mutex held. */ int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) { int ret; - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); ret = create_event_toplevel_files(parent, tr); if (ret) - goto out_unlock; + goto out; down_write(&trace_event_sem); __trace_add_event_dirs(tr); up_write(&trace_event_sem); - out_unlock: - mutex_unlock(&event_mutex); - + out: return ret; } @@ -3007,9 +3007,10 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) return ret; } +/* Must be called with event_mutex held */ int event_trace_del_tracer(struct trace_array *tr) { - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); /* Disable any event triggers and associated soft-disabled events */ clear_event_triggers(tr); @@ -3030,8 +3031,6 @@ int event_trace_del_tracer(struct trace_array *tr) tr->event_dir = NULL; - mutex_unlock(&event_mutex); - return 0; } diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index e2da180ca172a6418b448253e244a1dd5ef61ca3..6fb5eb7b57dc03eb5b7de6c4253e7ea8b2d89a0c 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -127,9 +127,10 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) { struct trace_event_file *event_file = event_file_data(m->private); - if (t == SHOW_AVAILABLE_TRIGGERS) + if (t == SHOW_AVAILABLE_TRIGGERS) { + (*pos)++; return NULL; - + } return seq_list_next(t, &event_file->triggers, pos); } @@ -1074,14 +1075,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) { - int ret = register_trigger(glob, ops, data, file); - - if (ret > 0 && tracing_alloc_snapshot_instance(file->tr) != 0) { - unregister_trigger(glob, ops, data, file); - ret = 0; - } + if (tracing_alloc_snapshot_instance(file->tr) != 0) + return 0; - return ret; + return register_trigger(glob, ops, data, file); } static int diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 0c23b5615977fa34a6c09f67354bf24646fe5b0b..b0db2e4cefa3570dddd4b49912c9853e3b49a09c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -877,6 +877,8 @@ static int probes_seq_show(struct seq_file *m, void *v) int i; seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p'); + if (trace_kprobe_is_return(tk) && tk->rp.maxactive) + seq_printf(m, "%d", tk->rp.maxactive); seq_printf(m, ":%s/%s", tk->tp.call.class->system, trace_event_name(&tk->tp.call)); diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 75bf1bcb4a8a56164641549f51bbec41584afe24..92b76f9e25eddd71ef4760ca2cce00504236b18d 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -278,18 +278,22 @@ static int tracing_stat_init(void) d_tracing = tracing_init_dentry(); if (IS_ERR(d_tracing)) - return 0; + return -ENODEV; stat_dir = tracefs_create_dir("trace_stat", d_tracing); - if (!stat_dir) + if (!stat_dir) { pr_warn("Could not create tracefs 'trace_stat' entry\n"); + return -ENOMEM; + } return 0; } static int init_stat_file(struct stat_session *session) { - if (!stat_dir && tracing_stat_init()) - return -ENODEV; + int ret; + + if (!stat_dir && (ret = tracing_stat_init())) + return ret; session->file = tracefs_create_file(session->ts->name, 0644, stat_dir, @@ -302,7 +306,7 @@ static int init_stat_file(struct stat_session *session) int register_stat_tracer(struct tracer_stat *trace) { struct stat_session *session, *node; - int ret; + int ret = -EINVAL; if (!trace) return -EINVAL; @@ -313,17 +317,15 @@ int register_stat_tracer(struct tracer_stat *trace) /* Already registered? */ mutex_lock(&all_stat_sessions_mutex); list_for_each_entry(node, &all_stat_sessions, session_list) { - if (node->ts == trace) { - mutex_unlock(&all_stat_sessions_mutex); - return -EINVAL; - } + if (node->ts == trace) + goto out; } - mutex_unlock(&all_stat_sessions_mutex); + ret = -ENOMEM; /* Init the session */ session = kzalloc(sizeof(*session), GFP_KERNEL); if (!session) - return -ENOMEM; + goto out; session->ts = trace; INIT_LIST_HEAD(&session->session_list); @@ -332,15 +334,16 @@ int register_stat_tracer(struct tracer_stat *trace) ret = init_stat_file(session); if (ret) { destroy_session(session); - return ret; + goto out; } + ret = 0; /* Register */ - mutex_lock(&all_stat_sessions_mutex); list_add_tail(&session->session_list, &all_stat_sessions); + out: mutex_unlock(&all_stat_sessions_mutex); - return 0; + return ret; } void unregister_stat_tracer(struct tracer_stat *trace) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 45ea3a8853cc798f770648c7bebebd63fec6c7a4..1f81cb57eafc3c5ad1249a1a9ef5a345acb48c4d 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -165,6 +165,8 @@ static void lockup_detector_update_enable(void) #ifdef CONFIG_SOFTLOCKUP_DETECTOR +#define SOFTLOCKUP_RESET ULONG_MAX + /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; @@ -273,7 +275,7 @@ notrace void touch_softlockup_watchdog_sched(void) * Preemption can be enabled. It doesn't matter which CPU's timestamp * gets zeroed here, so use the raw_ operation. */ - raw_cpu_write(watchdog_touch_ts, 0); + raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } notrace void touch_softlockup_watchdog(void) @@ -297,14 +299,14 @@ void touch_all_softlockup_watchdogs(void) * the softlockup check. */ for_each_cpu(cpu, &watchdog_allowed_mask) - per_cpu(watchdog_touch_ts, cpu) = 0; + per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET; wq_watchdog_touch(-1); } void touch_softlockup_watchdog_sync(void) { __this_cpu_write(softlockup_touch_sync, true); - __this_cpu_write(watchdog_touch_ts, 0); + __this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } static int is_softlockup(unsigned long touch_ts) @@ -356,7 +358,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); - if (touch_ts == 0) { + if (touch_ts == SOFTLOCKUP_RESET) { if (unlikely(__this_cpu_read(softlockup_touch_sync))) { /* * If the time stamp was touched atomically diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 25053738531c538da6d20b21d0846787ae71ffaf..f93f40dd7aaf2c75a42bd952f21c29bea37ce409 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1414,14 +1414,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, WARN_ON_ONCE(!is_chained_work(wq))) return; retry: - if (req_cpu == WORK_CPU_UNBOUND) - cpu = wq_select_unbound_cpu(raw_smp_processor_id()); - /* pwq which will be used unless @work is executing elsewhere */ - if (!(wq->flags & WQ_UNBOUND)) - pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); - else + if (wq->flags & WQ_UNBOUND) { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = wq_select_unbound_cpu(raw_smp_processor_id()); pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); + } else { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = raw_smp_processor_id(); + pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); + } /* * If @work was previously on a different pool, it might still be @@ -1538,8 +1540,10 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct work_struct *work = &dwork->work; WARN_ON_ONCE(!wq); - WARN_ON_ONCE(timer->function != delayed_work_timer_fn || - timer->data != (unsigned long)dwork); +#ifndef CONFIG_CFI_CLANG + WARN_ON_ONCE(timer->function != delayed_work_timer_fn); +#endif + WARN_ON_ONCE(timer->data != (unsigned long)dwork); WARN_ON_ONCE(timer_pending(timer)); WARN_ON_ONCE(!list_empty(&work->entry)); diff --git a/lib/Makefile b/lib/Makefile index ea43d7b99fc95127721c7d9d54bd62c61c68f7ae..ec649b0b711e1837a2a6ffaf2953c04a3a4205f5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -79,6 +79,7 @@ obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o +CFLAGS_test_stackinit.o += $(call cc-disable-warning, switch-unreachable) obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o diff --git a/lib/find_bit.c b/lib/find_bit.c index 6ed74f78380ce1f6e69568fad0c28d685a5f2048..883ef3755a1cb390ae06efab84a483f4978bd574 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -133,18 +133,6 @@ EXPORT_SYMBOL(find_last_bit); #ifdef __BIG_ENDIAN -/* include/linux/byteorder does not support "unsigned long" type */ -static inline unsigned long ext2_swab(const unsigned long y) -{ -#if BITS_PER_LONG == 64 - return (unsigned long) __swab64((u64) y); -#elif BITS_PER_LONG == 32 - return (unsigned long) __swab32((u32) y); -#else -#error BITS_PER_LONG not defined -#endif -} - #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) static unsigned long _find_next_bit_le(const unsigned long *addr, unsigned long nbits, unsigned long start, unsigned long invert) @@ -157,7 +145,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr, tmp = addr[start / BITS_PER_LONG] ^ invert; /* Handle 1st word. */ - tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start)); + tmp &= swab(BITMAP_FIRST_WORD_MASK(start)); start = round_down(start, BITS_PER_LONG); while (!tmp) { @@ -168,7 +156,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr, tmp = addr[start / BITS_PER_LONG] ^ invert; } - return min(start + __ffs(ext2_swab(tmp)), nbits); + return min(start + __ffs(swab(tmp)), nbits); } #endif diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index 08c60d10747fddc204788e75f24107805702caf4..e01b705556aa68fb156d4e481c07c9677dde83d8 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -756,22 +756,22 @@ do { \ do { \ if (__builtin_constant_p(bh) && (bh) == 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "%r" ((USItype)(ah)), \ "%r" ((USItype)(al)), \ "rI" ((USItype)(bl))); \ else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "%r" ((USItype)(ah)), \ "%r" ((USItype)(al)), \ "rI" ((USItype)(bl))); \ else \ __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "%r" ((USItype)(ah)), \ "r" ((USItype)(bh)), \ "%r" ((USItype)(al)), \ @@ -781,36 +781,36 @@ do { \ do { \ if (__builtin_constant_p(ah) && (ah) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "r" ((USItype)(bh)), \ "rI" ((USItype)(al)), \ "r" ((USItype)(bl))); \ else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "r" ((USItype)(bh)), \ "rI" ((USItype)(al)), \ "r" ((USItype)(bl))); \ else if (__builtin_constant_p(bh) && (bh) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "r" ((USItype)(ah)), \ "rI" ((USItype)(al)), \ "r" ((USItype)(bl))); \ else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "r" ((USItype)(ah)), \ "rI" ((USItype)(al)), \ "r" ((USItype)(bl))); \ else \ __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "r" ((USItype)(ah)), \ "r" ((USItype)(bh)), \ "rI" ((USItype)(al)), \ @@ -821,7 +821,7 @@ do { \ do { \ USItype __m0 = (m0), __m1 = (m1); \ __asm__ ("mulhwu %0,%1,%2" \ - : "=r" ((USItype) ph) \ + : "=r" (ph) \ : "%r" (__m0), \ "r" (__m1)); \ (pl) = __m0 * __m1; \ diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc index d5242f5445511784bc821f3cd64a4487f2673c97..b7c68030da4fd94d5e6bf3efca157127288e9f9a 100644 --- a/lib/raid6/neon.uc +++ b/lib/raid6/neon.uc @@ -28,7 +28,6 @@ typedef uint8x16_t unative_t; -#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) #define NSIZE sizeof(unative_t) /* @@ -61,7 +60,7 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) int d, z, z0; register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d); z0 = disks - 3; /* Highest data disk */ p = dptr[z0+1]; /* XOR parity */ @@ -92,7 +91,7 @@ void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop, int d, z, z0; register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d); z0 = stop; /* P/Q right side optimization */ p = dptr[disks-2]; /* XOR parity */ diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c index 8cd20c9f834a1e8cc90e9a4f783f74a4244a5f7a..7d00c31a654706763eb29da6952bb7033e635eee 100644 --- a/lib/raid6/recov_neon_inner.c +++ b/lib/raid6/recov_neon_inner.c @@ -10,11 +10,6 @@ #include -static const uint8x16_t x0f = { - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -}; - #ifdef CONFIG_ARM /* * AArch32 does not provide this intrinsic natively because it does not @@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp, uint8x16_t pm1 = vld1q_u8(pbmul + 16); uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f); /* * while ( bytes-- ) { @@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq, { uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f); /* * while (bytes--) { diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 11fce289d116675c341478893a5ad263c87c4793..834c846c5af84f1552a81f2f9dc5b0b79ad4840b 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -317,7 +317,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, if (prv) table->nents = ++table->orig_nents; - return -ENOMEM; + return -ENOMEM; } sg_init_table(sg, alloc_size); diff --git a/lib/stackdepot.c b/lib/stackdepot.c index da258c507675673706a993d1062c75c6e8e4c089..be32f766ab940b55285706e70de8a4f1bb8f0491 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -92,15 +92,19 @@ static bool init_stack_slab(void **prealloc) return true; if (stack_slabs[depot_index] == NULL) { stack_slabs[depot_index] = *prealloc; + *prealloc = NULL; } else { - stack_slabs[depot_index + 1] = *prealloc; + /* If this is the last depot slab, do not touch the next one. */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { + stack_slabs[depot_index + 1] = *prealloc; + *prealloc = NULL; + } /* * This smp_store_release pairs with smp_load_acquire() from * |next_slab_inited| above and in depot_save_stack(). */ smp_store_release(&next_slab_inited, 1); } - *prealloc = NULL; return true; } diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c index 7c5f9fb49e58795f2e9801ddd1388334a94f3fe1..c589bfa120f371cb612e5f0fe80775423bfed60a 100644 --- a/lib/test_stackinit.c +++ b/lib/test_stackinit.c @@ -92,8 +92,9 @@ static bool range_contains(char *haystack_start, size_t haystack_size, * @var_type: type to be tested for zeroing initialization * @which: is this a SCALAR, STRING, or STRUCT type? * @init_level: what kind of initialization is performed + * @xfail: is this test expected to fail? */ -#define DEFINE_TEST_DRIVER(name, var_type, which) \ +#define DEFINE_TEST_DRIVER(name, var_type, which, xfail) \ /* Returns 0 on success, 1 on failure. */ \ static noinline __init int test_ ## name (void) \ { \ @@ -139,13 +140,14 @@ static noinline __init int test_ ## name (void) \ for (sum = 0, i = 0; i < target_size; i++) \ sum += (check_buf[i] == 0xFF); \ \ - if (sum == 0) \ + if (sum == 0) { \ pr_info(#name " ok\n"); \ - else \ - pr_warn(#name " FAIL (uninit bytes: %d)\n", \ - sum); \ - \ - return (sum != 0); \ + return 0; \ + } else { \ + pr_warn(#name " %sFAIL (uninit bytes: %d)\n", \ + (xfail) ? "X" : "", sum); \ + return (xfail) ? 0 : 1; \ + } \ } #define DEFINE_TEST(name, var_type, which, init_level) \ /* no-op to force compiler into ignoring "uninitialized" vars */\ @@ -189,7 +191,7 @@ static noinline __init int leaf_ ## name(unsigned long sp, \ \ return (int)buf[0] | (int)buf[sizeof(buf) - 1]; \ } \ -DEFINE_TEST_DRIVER(name, var_type, which) +DEFINE_TEST_DRIVER(name, var_type, which, 0) /* Structure with no padding. */ struct test_packed { @@ -282,8 +284,9 @@ DEFINE_TEST(user, struct test_user, STRUCT, none); */ static int noinline __leaf_switch_none(int path, bool fill) { + uint64_t var; + switch (path) { - uint64_t var; case 1: target_start = &var; @@ -326,8 +329,14 @@ static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill, return __leaf_switch_none(2, fill); } -DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR); -DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR); +/* + * These are expected to fail for most configurations because neither + * GCC nor Clang have a way to perform initialization of variables in + * non-code areas (i.e. in a switch statement before the first "case"). + * https://bugs.llvm.org/show_bug.cgi?id=44916 + */ +DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, 1); +DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, 1); static int __init test_stackinit_init(void) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5c089f2d2d4c74f4226ebb81e6d01b66e6bc30d8..8847f84da129b3330f25287c330c822a91edeff6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -163,16 +163,13 @@ static ssize_t enabled_store(struct kobject *kobj, { ssize_t ret = count; - if (!memcmp("always", buf, - min(sizeof("always")-1, count))) { + if (sysfs_streq(buf, "always")) { clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("madvise", buf, - min(sizeof("madvise")-1, count))) { + } else if (sysfs_streq(buf, "madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("never", buf, - min(sizeof("never")-1, count))) { + } else if (sysfs_streq(buf, "never")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); } else @@ -236,32 +233,27 @@ static ssize_t defrag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - if (!memcmp("always", buf, - min(sizeof("always")-1, count))) { + if (sysfs_streq(buf, "always")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("defer+madvise", buf, - min(sizeof("defer+madvise")-1, count))) { + } else if (sysfs_streq(buf, "defer+madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("defer", buf, - min(sizeof("defer")-1, count))) { + } else if (sysfs_streq(buf, "defer")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("madvise", buf, - min(sizeof("madvise")-1, count))) { + } else if (sysfs_streq(buf, "madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("never", buf, - min(sizeof("never")-1, count))) { + } else if (sysfs_streq(buf, "never")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); @@ -2562,7 +2554,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) unsigned long flags; pgoff_t end; - VM_BUG_ON_PAGE(is_huge_zero_page(page), page); + VM_BUG_ON_PAGE(is_huge_zero_page(head), head); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageCompound(page), page); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9e8c023b11688295f9ae5e8a69b7870662d536ce..9ac4f76310cfd2b95739a9c98bcd452541f593be 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4747,8 +4747,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, { pgd_t *pgd; p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; + pud_t *pud, pud_entry; + pmd_t *pmd, pmd_entry; pgd = pgd_offset(mm, addr); if (!pgd_present(*pgd)) @@ -4758,17 +4758,19 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return NULL; pud = pud_offset(p4d, addr); - if (sz != PUD_SIZE && pud_none(*pud)) + pud_entry = READ_ONCE(*pud); + if (sz != PUD_SIZE && pud_none(pud_entry)) return NULL; /* hugepage or swap? */ - if (pud_huge(*pud) || !pud_present(*pud)) + if (pud_huge(pud_entry) || !pud_present(pud_entry)) return (pte_t *)pud; pmd = pmd_offset(pud, addr); - if (sz != PMD_SIZE && pmd_none(*pmd)) + pmd_entry = READ_ONCE(*pmd); + if (sz != PMD_SIZE && pmd_none(pmd_entry)) return NULL; /* hugepage or swap? */ - if (pmd_huge(*pmd) || !pmd_present(*pmd)) + if (pmd_huge(pmd_entry) || !pmd_present(pmd_entry)) return (pte_t *)pmd; return NULL; diff --git a/mm/ksm.c b/mm/ksm.c index f288fc70c4462a3a2c40f4425e0d6d5925dcc92b..1ba1593726798e8fba52810f29f7b70ddc69b5c1 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2074,8 +2074,16 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) down_read(&mm->mmap_sem); vma = find_mergeable_vma(mm, rmap_item->address); - err = try_to_merge_one_page(vma, page, - ZERO_PAGE(rmap_item->address)); + if (vma) { + err = try_to_merge_one_page(vma, page, + ZERO_PAGE(rmap_item->address)); + } else { + /* + * If the vma is out of date, we do not need to + * continue. + */ + err = 0; + } up_read(&mm->mmap_sem); /* * In case of failure, the page was not really empty, so we diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1b709c775229b09ed0da043f4c652dd6b110bbae..082ac42cca234d259e9f932d23e7e4910f61af3a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3563,7 +3563,7 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, struct mem_cgroup_thresholds *thresholds; struct mem_cgroup_threshold_ary *new; unsigned long usage; - int i, j, size; + int i, j, size, entries; mutex_lock(&memcg->thresholds_lock); @@ -3583,14 +3583,20 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, __mem_cgroup_threshold(memcg, type == _MEMSWAP); /* Calculate new number of threshold */ - size = 0; + size = entries = 0; for (i = 0; i < thresholds->primary->size; i++) { if (thresholds->primary->entries[i].eventfd != eventfd) size++; + else + entries++; } new = thresholds->spare; + /* If no items related to eventfd have been cleared, nothing to do */ + if (!entries) + goto unlock; + /* Set thresholds array to NULL if we don't have thresholds */ if (!size) { kfree(new); @@ -5912,19 +5918,9 @@ void mem_cgroup_sk_alloc(struct sock *sk) if (!mem_cgroup_sockets_enabled) return; - /* - * Socket cloning can throw us here with sk_memcg already - * filled. It won't however, necessarily happen from - * process context. So the test for root memcg given - * the current task's memcg won't help us in this case. - * - * Respecting the original socket's memcg is a better - * decision in this case. - */ - if (sk->sk_memcg) { - css_get(&sk->sk_memcg->css); + /* Do not associate the sock with unrelated interrupted task's memcg. */ + if (in_interrupt()) return; - } rcu_read_lock(); memcg = mem_cgroup_from_task(current); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 6bc6c2f833a4a30819bc65796f6478380051c27c..b29d66b66dbe0c12c2b333dc5e80c84448b455e4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2769,7 +2769,9 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) switch (mode) { case MPOL_PREFERRED: /* - * Insist on a nodelist of one node only + * Insist on a nodelist of one node only, although later + * we use first_node(nodes) to grab a single node, so here + * nodelist (or nodes) cannot be empty. */ if (nodelist) { char *rest = nodelist; @@ -2777,6 +2779,8 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) rest++; if (*rest) goto out; + if (nodes_empty(nodes)) + goto out; } break; case MPOL_INTERLEAVE: diff --git a/mm/mmap.c b/mm/mmap.c index 4c7f5ae61837ff2b186dc01972b54d80815708a4..573e0ac6f1f5b779e84bff02f0bab6d851b97a04 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -202,8 +202,6 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) bool populate; LIST_HEAD(uf); - brk = untagged_addr(brk); - if (down_write_killable(&mm->mmap_sem)) return -EINTR; @@ -1598,8 +1596,6 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, struct file *file = NULL; unsigned long retval; - addr = untagged_addr(addr); - if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); file = fget(fd); diff --git a/mm/mprotect.c b/mm/mprotect.c index 2fb38d9c7e123858770bb35249d20b20f90f4740..3979aa727f896316338e55ef93a30b38f2727fd8 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -148,6 +148,31 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, return pages; } +/* + * Used when setting automatic NUMA hinting protection where it is + * critical that a numa hinting PMD is not confused with a bad PMD. + */ +static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) +{ + pmd_t pmdval = pmd_read_atomic(pmd); + + /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + barrier(); +#endif + + if (pmd_none(pmdval)) + return 1; + if (pmd_trans_huge(pmdval)) + return 0; + if (unlikely(pmd_bad(pmdval))) { + pmd_clear_bad(pmd); + return 1; + } + + return 0; +} + static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) @@ -164,8 +189,17 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, unsigned long this_pages; next = pmd_addr_end(addr, end); - if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) - && pmd_none_or_clear_bad(pmd)) + + /* + * Automatic NUMA balancing walks the tables with mmap_sem + * held for read. It's possible a parallel update to occur + * between pmd_trans_huge() and a pmd_none_or_clear_bad() + * check leading to a false positive and clearing. + * Hence, it's necessary to atomically read the PMD value + * for all the checks. + */ + if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && + pmd_none_or_clear_bad_unless_trans_huge(pmd)) goto next; /* invoke the mmu notifier if the pmd is populated */ diff --git a/mm/mremap.c b/mm/mremap.c index 9737d473089d3df70b60ff4fc60793ad04532efc..d18f8429596f6a81e99b2bc1b83012e15f1db77f 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -543,7 +543,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, LIST_HEAD(uf_unmap); addr = untagged_addr(addr); - new_addr = untagged_addr(new_addr); if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) return ret; diff --git a/mm/nommu.c b/mm/nommu.c index 040bde3ef3f782ddde0fd184df13d6e9aa21a1dd..f430e87f4ea439d12c52a927090519d83f42dfbd 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -450,10 +450,14 @@ void vm_unmap_aliases(void) EXPORT_SYMBOL_GPL(vm_unmap_aliases); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement a stub for vmalloc_sync_[un]mapping() if the architecture + * chose not to have one. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) +{ +} + +void __weak vmalloc_sync_unmappings(void) { } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bfcbb43fd7d98061bebbb0df9200b7afec0a16f7..496ad8492c92c29660b8e0cc9071990be2c94772 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4534,11 +4534,11 @@ void *page_frag_alloc(struct page_frag_cache *nc, /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ - page_ref_add(page, size); + page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); /* reset page count bias and offset to start of new frag */ nc->pfmemalloc = page_is_pfmemalloc(page); - nc->pagecnt_bias = size + 1; + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; nc->offset = size; } @@ -4554,10 +4554,10 @@ void *page_frag_alloc(struct page_frag_cache *nc, size = nc->size; #endif /* OK, page count is 0, we can safely set it */ - set_page_count(page, size + 1); + set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size + 1; + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; offset = size - fragsz; } diff --git a/mm/shmem.c b/mm/shmem.c index 393a674b92bf8965dab63e6848fe84a61c76a577..05399bbc865e9b4db126646005f4e75bd0aa2c47 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2352,11 +2352,11 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, lru_cache_add_anon(page); - spin_lock(&info->lock); + spin_lock_irq(&info->lock); info->alloced++; inode->i_blocks += BLOCKS_PER_PAGE; shmem_recalc_inode(inode); - spin_unlock(&info->lock); + spin_unlock_irq(&info->lock); inc_mm_counter(dst_mm, mm_counter_file(page)); page_add_file_rmap(page, false); diff --git a/mm/slub.c b/mm/slub.c index 5dcbbea0d57ab85056dd4ff2b19622fba8bd30f1..fd5f9337bc7a8accbfae91170915302fded168de 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -259,7 +259,7 @@ static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr, * freepointer to be restored incorrectly. */ return (void *)((unsigned long)ptr ^ s->random ^ - (unsigned long)kasan_reset_tag((void *)ptr_addr)); + swab((unsigned long)kasan_reset_tag((void *)ptr_addr))); #else return ptr; #endif @@ -280,8 +280,7 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object) static void prefetch_freepointer(const struct kmem_cache *s, void *object) { - if (object) - prefetch(freelist_dereference(s, object + s->offset)); + prefetch(object + s->offset); } static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) @@ -2001,8 +2000,6 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, if (node == NUMA_NO_NODE) searchnode = numa_mem_id(); - else if (!node_present_pages(node)) - searchnode = node_to_mem_node(node); object = get_partial_node(s, get_node(s, searchnode), c, flags); if (object || node != NUMA_NO_NODE) @@ -2599,17 +2596,27 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, struct page *page; page = c->page; - if (!page) + if (!page) { + /* + * if the node is not online or has no normal memory, just + * ignore the node constraint + */ + if (unlikely(node != NUMA_NO_NODE && + !node_state(node, N_NORMAL_MEMORY))) + node = NUMA_NO_NODE; goto new_slab; + } redo: if (unlikely(!node_match(page, node))) { - int searchnode = node; - - if (node != NUMA_NO_NODE && !node_present_pages(node)) - searchnode = node_to_mem_node(node); - - if (unlikely(!node_match(page, searchnode))) { + /* + * same as above but node_match() being false already + * implies node != NUMA_NO_NODE + */ + if (!node_state(node, N_NORMAL_MEMORY)) { + node = NUMA_NO_NODE; + goto redo; + } else { stat(s, ALLOC_NODE_MISMATCH); deactivate_slab(s, page, c->freelist, c); goto new_slab; @@ -3034,11 +3041,13 @@ static __always_inline void do_slab_free(struct kmem_cache *s, barrier(); if (likely(page == c->page)) { - set_freepointer(s, tail_obj, c->freelist); + void **freelist = READ_ONCE(c->freelist); + + set_freepointer(s, tail_obj, freelist); if (unlikely(!this_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, - c->freelist, tid, + freelist, tid, head, next_tid(tid)))) { note_cmpxchg_failure("slab_free", s, tid); @@ -3211,6 +3220,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void *object = c->freelist; if (unlikely(!object)) { + /* + * We may have removed an object from c->freelist using + * the fastpath in the previous iteration; in that case, + * c->tid has not been bumped yet. + * Since ___slab_alloc() may reenable interrupts while + * allocating memory, we should bump c->tid now. + */ + c->tid = next_tid(c->tid); + /* * Invoking slow path likely have side-effect * of re-populating per CPU c->freelist diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e2d2ceb47075bf8b56676fee601af35b4b27ea17..7e3bcd86c95761439c6271b9bab023b0b39f2632 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -1779,7 +1780,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); - area->nr_pages = nr_pages; /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask, @@ -1787,13 +1787,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, } else { pages = kmalloc_node(array_size, nested_gfp, node); } - area->pages = pages; - if (!area->pages) { + + if (!pages) { remove_vm_area(area->addr); kfree(area); return NULL; } + area->pages = pages; + area->nr_pages = nr_pages; + for (i = 0; i < area->nr_pages; i++) { struct page *page; @@ -1868,7 +1871,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, * First make sure the mappings are removed from all page-tables * before they are freed. */ - vmalloc_sync_all(); + vmalloc_sync_unmappings(); /* * In this function, newly allocated vm_struct has VM_UNINITIALIZED @@ -2343,6 +2346,7 @@ long vwrite(char *buf, char *addr, unsigned long count) * @vma: vma to cover * @uaddr: target user address to start at * @kaddr: virtual address of vmalloc kernel memory + * @pgoff: offset from @kaddr to start at * @size: size of map area * * Returns: 0 for success, -Exxx on failure @@ -2355,9 +2359,15 @@ long vwrite(char *buf, char *addr, unsigned long count) * Similar to remap_pfn_range() (see mm/memory.c) */ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, - void *kaddr, unsigned long size) + void *kaddr, unsigned long pgoff, + unsigned long size) { struct vm_struct *area; + unsigned long off; + unsigned long end_index; + + if (check_shl_overflow(pgoff, PAGE_SHIFT, &off)) + return -EINVAL; size = PAGE_ALIGN(size); @@ -2371,8 +2381,10 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, if (!(area->flags & VM_USERMAP)) return -EINVAL; - if (kaddr + size > area->addr + get_vm_area_size(area)) + if (check_add_overflow(size, off, &end_index) || + end_index > get_vm_area_size(area)) return -EINVAL; + kaddr += off; do { struct page *page = vmalloc_to_page(kaddr); @@ -2411,22 +2423,25 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff) { return remap_vmalloc_range_partial(vma, vma->vm_start, - addr + (pgoff << PAGE_SHIFT), + addr, pgoff, vma->vm_end - vma->vm_start); } EXPORT_SYMBOL(remap_vmalloc_range); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose + * not to have one. * * The purpose of this function is to make sure the vmalloc area * mappings are identical in all page-tables in the system. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) { } +void __weak vmalloc_sync_unmappings(void) +{ +} static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) { diff --git a/mm/vmscan.c b/mm/vmscan.c index d0709374d9fadbcda93ad9428e5dc4123173dca8..f60ba74a2da12a62c3680fcbe1059bbfe436724b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2444,10 +2444,13 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, /* * Scan types proportional to swappiness and * their relative recent reclaim efficiency. - * Make sure we don't miss the last page - * because of a round-off error. + * Make sure we don't miss the last page on + * the offlined memory cgroups because of a + * round-off error. */ - scan = DIV64_U64_ROUND_UP(scan * fraction[file], + scan = mem_cgroup_online(memcg) ? + div64_u64(scan * fraction[file], denominator) : + DIV64_U64_ROUND_UP(scan * fraction[file], denominator); break; case SCAN_FILE: diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 8b3f9441b3a01cd5732131226f9e911172b4086c..7a723e124dbb55eef70f3b433de6a5d1aa63558e 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -149,7 +150,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node) * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, - int max_if_num) + unsigned int max_if_num) { void *data_ptr; size_t old_size; @@ -193,7 +194,8 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, */ static void batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t chunk_size; size_t if_offset; @@ -231,7 +233,8 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, */ static void batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t if_offset; void *data_ptr; @@ -268,7 +271,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); @@ -302,7 +306,8 @@ static struct batadv_orig_node * batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) { struct batadv_orig_node *orig_node; - int size, hash_added; + int hash_added; + size_t size; orig_node = batadv_orig_hash_find(bat_priv, addr); if (orig_node) @@ -366,14 +371,18 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) unsigned char *ogm_buff; u32 random_seqno; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno); hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN; ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC); - if (!ogm_buff) + if (!ogm_buff) { + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); return -ENOMEM; + } hard_iface->bat_iv.ogm_buff = ogm_buff; @@ -385,35 +394,59 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); + return 0; } static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) { + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + kfree(hard_iface->bat_iv.ogm_buff); hard_iface->bat_iv.ogm_buff = NULL; + + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; ether_addr_copy(batadv_ogm_packet->orig, hard_iface->net_dev->dev_addr); ether_addr_copy(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; batadv_ogm_packet->ttl = BATADV_TTL; + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } /* when do we schedule our own ogm to be sent */ @@ -890,7 +923,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) u32 i; size_t word_index; u8 *w; - int if_num; + unsigned int if_num; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -911,7 +944,11 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) } } -static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +/** + * batadv_iv_ogm_schedule_buff() - schedule submission of hardif ogm buffer + * @hard_iface: interface whose ogm buffer should be transmitted + */ +static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; @@ -922,8 +959,10 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) u16 tvlv_len = 0; unsigned long send_time; - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) + lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); + + /* interface already disabled by batadv_iv_ogm_iface_disable */ + if (!*ogm_buff) return; /* the interface gets activated here to avoid race conditions between @@ -992,6 +1031,17 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) batadv_hardif_put(primary_if); } +static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +{ + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) + return; + + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + batadv_iv_ogm_schedule_buff(hard_iface); + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); +} + /** * batadv_iv_ogm_orig_update - use OGM to update corresponding data in an * originator @@ -1020,7 +1070,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; - int if_num; + unsigned int if_num; u8 sum_orig, sum_neigh; u8 *neigh_addr; u8 tq_avg; @@ -1179,7 +1229,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, u8 total_count; u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; - int if_num; + unsigned int if_num; unsigned int tq_asym_penalty, inv_asym_penalty; unsigned int combined_tq; unsigned int tq_iface_penalty; @@ -1220,7 +1270,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, orig_node->last_seen = jiffies; /* find packet count of corresponding one hop neighbor */ - spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); + spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); if_num = if_incoming->if_num; orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); @@ -1230,7 +1280,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, } else { neigh_rq_count = 0; } - spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); + spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); /* pay attention to not get a value bigger than 100 % */ if (orig_eq_count > neigh_rq_count) @@ -1698,9 +1748,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (is_my_orig) { unsigned long *word; - int offset; + size_t offset; s32 bit_pos; - s16 if_num; + unsigned int if_num; u8 *weight; orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, @@ -2477,7 +2527,7 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, return ret; } -static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) +static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface) { /* begin scheduling originator messages on that interface */ batadv_iv_ogm_schedule(hard_iface); @@ -2817,8 +2867,8 @@ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .name = "BATMAN_IV", .iface = { - .activate = batadv_iv_iface_activate, .enable = batadv_iv_ogm_iface_enable, + .enabled = batadv_iv_iface_enabled, .disable = batadv_iv_ogm_iface_disable, .update_mac = batadv_iv_ogm_iface_update_mac, .primary_set = batadv_iv_ogm_primary_iface_set, diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 371a1f1651b48d58beef141f002eec7bf3cfe556..eb8cec14b854b4ca1ca23c91c3637c99cb356724 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -19,7 +19,6 @@ #include "main.h" #include -#include #include #include #include @@ -623,11 +622,11 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, int ret = 0; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; @@ -649,11 +648,11 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, bool ret = false; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; threshold = ifinfo1->bat_v.throughput / 4; @@ -815,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, } orig_gw = batadv_gw_node_get(bat_priv, orig_node); - if (!orig_node) + if (!orig_gw) goto out; if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index e07f636160b67533c63d3c779797347888143605..cec31769bb3fc8e1d2814e71b0540f78e92ef902 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -127,14 +129,12 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb, } /** - * batadv_v_ogm_send - periodic worker broadcasting the own OGM - * @work: work queue item + * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM + * @bat_priv: the bat priv with all the soft interface information */ -static void batadv_v_ogm_send(struct work_struct *work) +static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv) { struct batadv_hard_iface *hard_iface; - struct batadv_priv_bat_v *bat_v; - struct batadv_priv *bat_priv; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; unsigned char *ogm_buff; @@ -142,8 +142,7 @@ static void batadv_v_ogm_send(struct work_struct *work) u16 tvlv_len = 0; int ret; - bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); - bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; @@ -234,6 +233,23 @@ static void batadv_v_ogm_send(struct work_struct *work) return; } +/** + * batadv_v_ogm_send() - periodic worker broadcasting the own OGM + * @work: work queue item + */ +static void batadv_v_ogm_send(struct work_struct *work) +{ + struct batadv_priv_bat_v *bat_v; + struct batadv_priv *bat_priv; + + bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); + bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + batadv_v_ogm_send_softif(bat_priv); + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); +} + /** * batadv_v_ogm_iface_enable - prepare an interface for B.A.T.M.A.N. V * @hard_iface: the interface to prepare @@ -260,11 +276,15 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface); struct batadv_ogm2_packet *ogm_packet; + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); if (!bat_priv->bat_v.ogm_buff) - return; + goto unlock; ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff; ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } /** @@ -886,6 +906,8 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv) atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno); INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send); + mutex_init(&bat_priv->bat_v.ogm_buff_mutex); + return 0; } @@ -897,7 +919,11 @@ void batadv_v_ogm_free(struct batadv_priv *bat_priv) { cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq); + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + kfree(bat_priv->bat_v.ogm_buff); bat_priv->bat_v.ogm_buff = NULL; bat_priv->bat_v.ogm_buff_len = 0; + + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index e32ad47c6efdf17914aad1e89029020d15801150..4957d4824437ae0240a732f943d39caf2868923f 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -18,6 +18,7 @@ #include "debugfs.h" #include "main.h" +#include #include #include #include @@ -338,7 +339,26 @@ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) } /** - * batadv_debugfs_del_hardif - delete the base directory for a hard interface + * batadv_debugfs_rename_hardif() - Fix debugfs path for renamed hardif + * @hard_iface: hard interface which was renamed + */ +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface) +{ + const char *name = hard_iface->net_dev->name; + struct dentry *dir; + struct dentry *d; + + dir = hard_iface->debug_dir; + if (!dir) + return; + + d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name); + if (!d) + pr_err("Can't rename debugfs dir to %s\n", name); +} + +/** + * batadv_debugfs_del_hardif() - delete the base directory for a hard interface * in debugfs. * @hard_iface: hard interface which is deleted. */ @@ -401,6 +421,30 @@ int batadv_debugfs_add_meshif(struct net_device *dev) return -ENOMEM; } +/** + * batadv_debugfs_rename_meshif() - Fix debugfs path for renamed softif + * @dev: net_device which was renamed + */ +void batadv_debugfs_rename_meshif(struct net_device *dev) +{ + struct batadv_priv *bat_priv = netdev_priv(dev); + const char *name = dev->name; + struct dentry *dir; + struct dentry *d; + + dir = bat_priv->debug_dir; + if (!dir) + return; + + d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name); + if (!d) + pr_err("Can't rename debugfs dir to %s\n", name); +} + +/** + * batadv_debugfs_del_meshif() - Remove interface dependent debugfs entries + * @dev: netdev struct of the soft interface + */ void batadv_debugfs_del_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 9c5d4a65b98c35239709d9258bb889a61ffef8c2..901bbc357bf42e5c471e50ee5cd4cf42df7e26dc 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -29,8 +29,10 @@ struct net_device; void batadv_debugfs_init(void); void batadv_debugfs_destroy(void); int batadv_debugfs_add_meshif(struct net_device *dev); +void batadv_debugfs_rename_meshif(struct net_device *dev); void batadv_debugfs_del_meshif(struct net_device *dev); int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface); +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface); void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface); #else @@ -48,6 +50,10 @@ static inline int batadv_debugfs_add_meshif(struct net_device *dev) return 0; } +static inline void batadv_debugfs_rename_meshif(struct net_device *dev) +{ +} + static inline void batadv_debugfs_del_meshif(struct net_device *dev) { } @@ -58,6 +64,11 @@ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) return 0; } +static inline +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface) +{ +} + static inline void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) { diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index c6d37d22bd125f779deeab58ea5035b63ae3f977..788d62073964b2f50979d894427855b21533465f 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -500,6 +500,8 @@ int batadv_frag_send_packet(struct sk_buff *skb, */ if (skb->priority >= 256 && skb->priority <= 263) frag_header.priority = skb->priority - 256; + else + frag_header.priority = 0; ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); ether_addr_copy(frag_header.dest, orig_node->orig); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 2e1a084b0bd2a10fb567c1adada8ca6ebd978bc7..9fdfa9984f024b33231b934db8ff611a6874fd09 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -738,6 +739,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); + if (bat_priv->num_ifaces >= UINT_MAX) { + ret = -ENOSPC; + goto err_dev; + } + ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface, NULL, NULL); if (ret) @@ -790,6 +796,9 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(soft_iface); + if (bat_priv->algo_ops->iface.enabled) + bat_priv->algo_ops->iface.enabled(hard_iface); + out: return 0; @@ -845,7 +854,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface); /* nobody uses this interface anymore */ - if (!bat_priv->num_ifaces) { + if (bat_priv->num_ifaces == 0) { batadv_gw_check_client_stop(bat_priv); if (autodel == BATADV_IF_CLEANUP_AUTO) @@ -881,7 +890,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) if (ret) goto free_if; - hard_iface->if_num = -1; + hard_iface->if_num = 0; hard_iface->net_dev = net_dev; hard_iface->soft_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; @@ -893,6 +902,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); + mutex_init(&hard_iface->bat_iv.ogm_buff_mutex); spin_lock_init(&hard_iface->neigh_list_lock); kref_init(&hard_iface->refcount); @@ -950,6 +960,32 @@ void batadv_hardif_remove_interfaces(void) rtnl_unlock(); } +/** + * batadv_hard_if_event_softif() - Handle events for soft interfaces + * @event: NETDEV_* event to handle + * @net_dev: net_device which generated an event + * + * Return: NOTIFY_* result + */ +static int batadv_hard_if_event_softif(unsigned long event, + struct net_device *net_dev) +{ + struct batadv_priv *bat_priv; + + switch (event) { + case NETDEV_REGISTER: + batadv_sysfs_add_meshif(net_dev); + bat_priv = netdev_priv(net_dev); + batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); + break; + case NETDEV_CHANGENAME: + batadv_debugfs_rename_meshif(net_dev); + break; + } + + return NOTIFY_DONE; +} + static int batadv_hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -958,12 +994,8 @@ static int batadv_hard_if_event(struct notifier_block *this, struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; - if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) { - batadv_sysfs_add_meshif(net_dev); - bat_priv = netdev_priv(net_dev); - batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); - return NOTIFY_DONE; - } + if (batadv_softif_is_valid(net_dev)) + return batadv_hard_if_event_softif(event, net_dev); hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface && (event == NETDEV_REGISTER || @@ -1012,6 +1044,9 @@ static int batadv_hard_if_event(struct notifier_block *this, if (batadv_is_wifi_hardif(hard_iface)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; break; + case NETDEV_CHANGENAME: + batadv_debugfs_rename_hardif(hard_iface); + break; default: break; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8e2a4b205257929e9b64f157e8570972cf1383f9..653eaadcfefb4db23c73034c157e7c86fc1bcc06 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1500,7 +1500,7 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) } int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_algo_ops *bao = bat_priv->algo_ops; @@ -1535,7 +1535,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, } int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index d94220a6d21acf58d55fcf7a179151c67392b055..d6ca52220ec066d8e82096b7487d6f5e9dfd57dc 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -78,9 +78,9 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); struct batadv_orig_node_vlan * batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, unsigned short vid); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index cd82cff716c78eb9df9f85be139e21693bab7464..f59aac06733e1a7c45f9bb08b48fbf9c42eb740a 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -950,14 +950,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL; int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; - struct ethhdr *ethhdr; int ret = NET_RX_DROP; bool is4addr, is_gw; unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - ethhdr = eth_hdr(skb); - is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ if (is4addr) @@ -977,12 +973,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size)) goto free_skb; + unicast_packet = (struct batadv_unicast_packet *)skb->data; + /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { /* If this is a unicast packet from another backgone gw, * drop it. */ - orig_addr_gw = ethhdr->h_source; + orig_addr_gw = eth_hdr(skb)->h_source; orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw); if (orig_node_gw) { is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw, @@ -997,6 +995,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, } if (is4addr) { + unicast_4addr_packet = + (struct batadv_unicast_4addr_packet *)skb->data; subtype = unicast_4addr_packet->subtype; batadv_dat_inc_counter(bat_priv, subtype); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 2c2670b85fa941f768b0c50c892add075b72e457..dbc516824175bb5ea7f3d5abf0ef07f6a5bdc9a2 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -872,7 +872,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, struct batadv_orig_node_vlan *vlan; u8 *tt_change_ptr; - rcu_read_lock(); + spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { num_vlan++; num_entries += atomic_read(&vlan->tt.num_entries); @@ -910,7 +910,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&orig_node->vlan_list_lock); return tvlv_len; } @@ -941,15 +941,20 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_softif_vlan *vlan; u16 num_vlan = 0; - u16 num_entries = 0; + u16 vlan_entries = 0; + u16 total_entries = 0; u16 tvlv_len; u8 *tt_change_ptr; int change_offset; - rcu_read_lock(); + spin_lock_bh(&bat_priv->softif_vlan_list_lock); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + num_vlan++; - num_entries += atomic_read(&vlan->tt.num_entries); + total_entries += vlan_entries; } change_offset = sizeof(**tt_data); @@ -957,7 +962,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) - *tt_len = batadv_tt_len(num_entries); + *tt_len = batadv_tt_len(total_entries); tvlv_len = *tt_len; tvlv_len += change_offset; @@ -974,6 +979,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); @@ -984,7 +993,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return tvlv_len; } @@ -1544,6 +1553,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, * by a given originator * @entry: the TT global entry to check * @orig_node: the originator to search in the list + * @flags: a pointer to store TT flags for the given @entry received + * from @orig_node * * find out if an orig_node is already in the list of a tt_global_entry. * @@ -1551,7 +1562,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, */ static bool batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, - const struct batadv_orig_node *orig_node) + const struct batadv_orig_node *orig_node, + u8 *flags) { struct batadv_tt_orig_list_entry *orig_entry; bool found = false; @@ -1559,6 +1571,10 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node); if (orig_entry) { found = true; + + if (flags) + *flags = orig_entry->flags; + batadv_tt_orig_list_entry_put(orig_entry); } @@ -1741,7 +1757,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, if (!(common->flags & BATADV_TT_CLIENT_TEMP)) goto out; if (batadv_tt_global_entry_has_orig(tt_global_entry, - orig_node)) + orig_node, NULL)) goto out_remove; batadv_tt_global_del_orig_list(tt_global_entry); goto add_orig_entry; @@ -2884,23 +2900,46 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv, } /** - * batadv_tt_local_valid - verify that given tt entry is a valid one + * batadv_tt_local_valid() - verify local tt entry and get flags * @entry_ptr: to be checked local tt entry * @data_ptr: not used but definition required to satisfy the callback prototype + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given local TT entry. If it is, then the provided + * flags pointer is updated. * * Return: true if the entry is a valid, false otherwise. */ -static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) +static bool batadv_tt_local_valid(const void *entry_ptr, + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW) return false; + + if (flags) + *flags = tt_common_entry->flags; + return true; } +/** + * batadv_tt_global_valid() - verify global tt entry and get flags + * @entry_ptr: to be checked global tt entry + * @data_ptr: an orig_node object (may be NULL) + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given global TT entry. If it is, then the provided + * flags pointer is updated either with the common (summed) TT flags if data_ptr + * is NULL or the specific, per originator TT flags otherwise. + * + * Return: true if the entry is a valid, false otherwise. + */ static bool batadv_tt_global_valid(const void *entry_ptr, - const void *data_ptr) + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; const struct batadv_tt_global_entry *tt_global_entry; @@ -2914,7 +2953,8 @@ static bool batadv_tt_global_valid(const void *entry_ptr, struct batadv_tt_global_entry, common); - return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node); + return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node, + flags); } /** @@ -2924,25 +2964,34 @@ static bool batadv_tt_global_valid(const void *entry_ptr, * @hash: hash table containing the tt entries * @tt_len: expected tvlv tt data buffer length in number of bytes * @tvlv_buff: pointer to the buffer to fill with the TT data - * @valid_cb: function to filter tt change entries + * @valid_cb: function to filter tt change entries and to return TT flags * @cb_data: data passed to the filter function as argument + * + * Fills the tvlv buff with the tt entries from the specified hash. If valid_cb + * is not provided then this becomes a no-op. */ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, void *tvlv_buff, u16 tt_len, bool (*valid_cb)(const void *, - const void *), + const void *, + u8 *flags), void *cb_data) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tvlv_tt_change *tt_change; struct hlist_head *head; u16 tt_tot, tt_num_entries = 0; + u8 flags; + bool ret; u32 i; tt_tot = batadv_tt_entries(tt_len); tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff; + if (!valid_cb) + return; + rcu_read_lock(); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2952,11 +3001,12 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, if (tt_tot == tt_num_entries) break; - if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data))) + ret = valid_cb(tt_common_entry, cb_data, &flags); + if (!ret) continue; ether_addr_copy(tt_change->addr, tt_common_entry->addr); - tt_change->flags = tt_common_entry->flags; + tt_change->flags = flags; tt_change->vid = htons(tt_common_entry->vid); memset(tt_change->reserved, 0, sizeof(tt_change->reserved)); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index d5e3968619b8e36f52875623e062dcad1c0e43a7..540a9c5c2270842050e2883ff6869604e6d30c4b 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include /* for linux/wait.h */ @@ -81,11 +82,13 @@ enum batadv_dhcp_recipient { * @ogm_buff: buffer holding the OGM packet * @ogm_buff_len: length of the OGM packet buffer * @ogm_seqno: OGM sequence number - used to identify each OGM + * @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */ struct batadv_hard_iface_bat_iv { unsigned char *ogm_buff; int ogm_buff_len; atomic_t ogm_seqno; + struct mutex ogm_buff_mutex; }; /** @@ -155,7 +158,7 @@ enum batadv_hard_iface_wifi_flags { */ struct batadv_hard_iface { struct list_head list; - s16 if_num; + unsigned int if_num; char if_status; u8 num_bcasts; u32 wifi_flags; @@ -989,12 +992,14 @@ struct batadv_softif_vlan { * @ogm_buff: buffer holding the OGM packet * @ogm_buff_len: length of the OGM packet buffer * @ogm_seqno: OGM sequence number - used to identify each OGM + * @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len * @ogm_wq: workqueue used to schedule OGM transmissions */ struct batadv_priv_bat_v { unsigned char *ogm_buff; int ogm_buff_len; atomic_t ogm_seqno; + struct mutex ogm_buff_mutex; struct delayed_work ogm_wq; }; @@ -1081,7 +1086,7 @@ struct batadv_priv { atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; - char num_ifaces; + unsigned int num_ifaces; struct kobject *mesh_obj; struct dentry *debug_dir; struct hlist_head forw_bat_list; @@ -1424,6 +1429,7 @@ struct batadv_forw_packet { * @activate: start routing mechanisms when hard-interface is brought up * (optional) * @enable: init routing info when hard-interface is enabled + * @enabled: notification when hard-interface was enabled (optional) * @disable: de-init routing info when hard-interface is disabled * @update_mac: (re-)init mac addresses of the protocol information * belonging to this hard-interface @@ -1432,6 +1438,7 @@ struct batadv_forw_packet { struct batadv_algo_iface_ops { void (*activate)(struct batadv_hard_iface *hard_iface); int (*enable)(struct batadv_hard_iface *hard_iface); + void (*enabled)(struct batadv_hard_iface *hard_iface); void (*disable)(struct batadv_hard_iface *hard_iface); void (*update_mac)(struct batadv_hard_iface *hard_iface); void (*primary_set)(struct batadv_hard_iface *hard_iface); @@ -1479,9 +1486,10 @@ struct batadv_algo_neigh_ops { */ struct batadv_algo_orig_ops { void (*free)(struct batadv_orig_node *orig_node); - int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); - int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, - int del_if_num); + int (*add_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num); + int (*del_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num, unsigned int del_if_num); #ifdef CONFIG_BATMAN_ADV_DEBUGFS void (*print)(struct batadv_priv *priv, struct seq_file *seq, struct batadv_hard_iface *hard_iface); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 5f3074cb6b4db67aad4491e99e291f073e1876c5..b6f26ec9e90cdaea9107ac11f8b6427c663b7cda 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -413,10 +413,8 @@ static int __rfcomm_create_dev(struct sock *sk, void __user *arg) dlc = rfcomm_dlc_exists(&req.src, &req.dst, req.channel); if (IS_ERR(dlc)) return PTR_ERR(dlc); - else if (dlc) { - rfcomm_dlc_put(dlc); + if (dlc) return -EBUSY; - } dlc = rfcomm_dlc_alloc(GFP_KERNEL); if (!dlc) return -ENOMEM; diff --git a/net/core/dev.c b/net/core/dev.c index 9c533ee7fcf44b89a745f8bbe19802bf956397bd..e8d1af5f95007dedbaea62d0856078442492329a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3581,7 +3581,8 @@ EXPORT_SYMBOL(netdev_max_backlog); int netdev_tstamp_prequeue __read_mostly = 1; int netdev_budget __read_mostly = 300; -unsigned int __read_mostly netdev_budget_usecs = 2000; +/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */ +unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ; int weight_p __read_mostly = 64; /* old backlog weight */ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9a6d97c1d8104eac629273337ca8999a86118533..9bb321df08698137897c2d8b5740a2e309d01522 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -799,7 +799,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->family = ops->family; - frh->table = rule->table; + frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT; if (nla_put_u32(skb, FRA_TABLE, rule->table)) goto nla_put_failure; if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 7bf83359861527564087e179760be87498cba600..668330ace961607c07ce98e9de0edf843c96b26e 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -57,30 +57,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css) kfree(css_cls_state(css)); } +/* + * To avoid freezing of sockets creation for tasks with big number of threads + * and opened sockets lets release file_lock every 1000 iterated descriptors. + * New sockets will already have been created with new classid. + */ + +struct update_classid_context { + u32 classid; + unsigned int batch; +}; + +#define UPDATE_CLASSID_BATCH 1000 + static int update_classid_sock(const void *v, struct file *file, unsigned n) { int err; + struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file, &err); if (sock) { spin_lock(&cgroup_sk_update_lock); - sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, - (unsigned long)v); + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); spin_unlock(&cgroup_sk_update_lock); } + if (--ctx->batch == 0) { + ctx->batch = UPDATE_CLASSID_BATCH; + return n + 1; + } return 0; } +static void update_classid_task(struct task_struct *p, u32 classid) +{ + struct update_classid_context ctx = { + .classid = classid, + .batch = UPDATE_CLASSID_BATCH + }; + unsigned int fd = 0; + + do { + task_lock(p); + fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); + task_unlock(p); + cond_resched(); + } while (fd); +} + static void cgrp_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct task_struct *p; cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)css_cls_state(css)->classid); - task_unlock(p); + update_classid_task(p, css_cls_state(css)->classid); } } @@ -101,13 +131,8 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, cs->classid = (u32)value; css_task_iter_start(css, 0, &it); - while ((p = css_task_iter_next(&it))) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)cs->classid); - task_unlock(p); - cond_resched(); - } + while ((p = css_task_iter_next(&it))) + update_classid_task(p, cs->classid); css_task_iter_end(&it); return 0; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7c479c1ffd77ff1bc2827ef659f6f29e0183ee64..cb15338cfda4d25b0ed849fb7d88f3a6f9f2e354 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2424,7 +2424,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) } if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { - __dev_notify_flags(dev, old_flags, 0U); + __dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags)); } else { dev->rtnl_link_state = RTNL_LINK_INITIALIZED; __dev_notify_flags(dev, old_flags, ~0U); diff --git a/net/core/sock.c b/net/core/sock.c index 4e934404aeaf0edf7df2c3d79cdc05540f5c70b9..bed67e70ec1e936614b2fcfec8e0cdd908700740 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1684,7 +1684,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); - mem_cgroup_sk_alloc(newsk); + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 5f5d9eafccf5965b3deb23b2449a1392e07e6c61..ea133857f19eb16e70684b3e0d5d597638d8757e 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -242,7 +242,7 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m) * - the key's semaphore is read-locked */ static long dns_resolver_read(const struct key *key, - char __user *buffer, size_t buflen) + char *buffer, size_t buflen) { int err = PTR_ERR(key->payload.data[dns_key_error]); diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index de92fc1fc3be6d996e9e0ec0b16ba4a3cc89f959..b3b918afd2127c58edee244a3d83ef4cfa6951ea 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -134,6 +134,8 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port].netdev; + skb->offload_fwd_mark = 1; + return skb; } diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index af3a12a36d882645ba929d4a28ee066e0d12df03..f268c5c3eedb6d55e028185a80f02afcb15a0596 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -41,7 +41,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); u16 *phdr, hdr; - if (skb_cow_head(skb, 0) < 0) + if (skb_cow_head(skb, QCA_HDR_LEN) < 0) return NULL; skb_push(skb, QCA_HDR_LEN); diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 6705420b3111f528a930f6410473beb47793cee7..d7206581145d54be739b80f0066a438849a83752 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -468,13 +468,9 @@ int hsr_get_node_data(struct hsr_priv *hsr, struct hsr_port *port; unsigned long tdiff; - - rcu_read_lock(); node = find_node_by_AddrA(&hsr->node_db, addr); - if (!node) { - rcu_read_unlock(); - return -ENOENT; /* No such entry */ - } + if (!node) + return -ENOENT; ether_addr_copy(addr_b, node->MacAddressB); @@ -509,7 +505,5 @@ int hsr_get_node_data(struct hsr_priv *hsr, *addr_b_ifindex = -1; } - rcu_read_unlock(); - return 0; } diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index b9cce0fd56966bbd8a16c8bc51555dfab467b6d0..606bc7fe5cc7c994de77d8ecd5a1ac5900954b9c 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -64,10 +64,16 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, else multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]); - if (!data[IFLA_HSR_VERSION]) + if (!data[IFLA_HSR_VERSION]) { hsr_version = 0; - else + } else { hsr_version = nla_get_u8(data[IFLA_HSR_VERSION]); + if (hsr_version > 1) { + NL_SET_ERR_MSG_MOD(extack, + "Only versions 0..1 are supported"); + return -EINVAL; + } + } return hsr_dev_finalize(dev, link, multicast_spec, hsr_version); } @@ -259,17 +265,16 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) if (!na) goto invalid; - hsr_dev = __dev_get_by_index(genl_info_net(info), - nla_get_u32(info->attrs[HSR_A_IFINDEX])); + rcu_read_lock(); + hsr_dev = dev_get_by_index_rcu(genl_info_net(info), + nla_get_u32(info->attrs[HSR_A_IFINDEX])); if (!hsr_dev) - goto invalid; + goto rcu_unlock; if (!is_hsr_master(hsr_dev)) - goto invalid; - + goto rcu_unlock; /* Send reply */ - - skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb_out) { res = -ENOMEM; goto fail; @@ -321,12 +326,10 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq); if (res < 0) goto nla_put_failure; - rcu_read_lock(); port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); if (port) res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX, port->dev->ifindex); - rcu_read_unlock(); if (res < 0) goto nla_put_failure; @@ -336,20 +339,22 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq); if (res < 0) goto nla_put_failure; - rcu_read_lock(); port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); if (port) res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX, port->dev->ifindex); - rcu_read_unlock(); if (res < 0) goto nla_put_failure; + rcu_read_unlock(); + genlmsg_end(skb_out, msg_head); genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid); return 0; +rcu_unlock: + rcu_read_unlock(); invalid: netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; @@ -359,6 +364,7 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) /* Fall through */ fail: + rcu_read_unlock(); return res; } @@ -366,16 +372,14 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) */ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) { - /* For receiving */ - struct nlattr *na; + unsigned char addr[ETH_ALEN]; struct net_device *hsr_dev; - - /* For sending */ struct sk_buff *skb_out; - void *msg_head; struct hsr_priv *hsr; - void *pos; - unsigned char addr[ETH_ALEN]; + bool restart = false; + struct nlattr *na; + void *pos = NULL; + void *msg_head; int res; if (!info) @@ -385,17 +389,17 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) if (!na) goto invalid; - hsr_dev = __dev_get_by_index(genl_info_net(info), - nla_get_u32(info->attrs[HSR_A_IFINDEX])); + rcu_read_lock(); + hsr_dev = dev_get_by_index_rcu(genl_info_net(info), + nla_get_u32(info->attrs[HSR_A_IFINDEX])); if (!hsr_dev) - goto invalid; + goto rcu_unlock; if (!is_hsr_master(hsr_dev)) - goto invalid; - + goto rcu_unlock; +restart: /* Send reply */ - - skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb_out = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb_out) { res = -ENOMEM; goto fail; @@ -409,18 +413,26 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) goto nla_put_failure; } - res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex); - if (res < 0) - goto nla_put_failure; + if (!restart) { + res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex); + if (res < 0) + goto nla_put_failure; + } hsr = netdev_priv(hsr_dev); - rcu_read_lock(); - pos = hsr_get_next_node(hsr, NULL, addr); + if (!pos) + pos = hsr_get_next_node(hsr, NULL, addr); while (pos) { res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr); if (res < 0) { - rcu_read_unlock(); + if (res == -EMSGSIZE) { + genlmsg_end(skb_out, msg_head); + genlmsg_unicast(genl_info_net(info), skb_out, + info->snd_portid); + restart = true; + goto restart; + } goto nla_put_failure; } pos = hsr_get_next_node(hsr, pos, addr); @@ -432,15 +444,18 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) return 0; +rcu_unlock: + rcu_read_unlock(); invalid: netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; nla_put_failure: - kfree_skb(skb_out); + nlmsg_free(skb_out); /* Fall through */ fail: + rcu_read_unlock(); return res; } @@ -467,6 +482,7 @@ static struct genl_family hsr_genl_family __ro_after_init = { .name = "HSR", .version = 1, .maxattr = HSR_A_MAX, + .netnsok = true, .module = THIS_MODULE, .ops = hsr_ops, .n_ops = ARRAY_SIZE(hsr_ops), diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 5fee6ec7c93d44cbc35bbb74989f73bbc7f7e774..b215df0bce0e01eaaae6746fdedf40e6d648f339 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -152,16 +152,16 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, if (port == NULL) return -ENOMEM; + port->hsr = hsr; + port->dev = dev; + port->type = type; + if (type != HSR_PT_MASTER) { res = hsr_portdev_setup(dev, port); if (res) goto fail_dev_setup; } - port->hsr = hsr; - port->dev = dev; - port->type = type; - list_add_tail_rcu(&port->port_list, &hsr->ports); synchronize_rcu(); diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 35c4326684548d88f408f84ded46ce6ac2d04ebc..040983fc15da37583fb5822df3cad04e3884948d 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -30,7 +30,13 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, }, [IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BCN_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_SF_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_PAN_COORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, }, + [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, }, [IEEE802154_ATTR_PAGE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_DEV_TYPE] = { .type = NLA_U8, }, [IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, }, [IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, }, diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index f48fe6fc7e8c413d7d7e4d7d37d1d859a566e8fb..4abc4ba733bf0312cc95b4dfbcd6756d6f06dcab 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -297,6 +297,7 @@ config SYN_COOKIES config NET_IPVTI tristate "Virtual (secure) IP: tunneling" + depends on IPV6 || IPV6=n select INET_TUNNEL select NET_IP_TUNNEL depends on INET_XFRM_MODE_TUNNEL diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index f0165c5f376b398950db7696e813c94b19034068..1c21dc5d6dd4049c47f947ac727516500d534ff2 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1738,6 +1738,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; + int res; if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; @@ -1749,7 +1750,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) memset(opt, 0, sizeof(struct ip_options)); opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); - if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL); + rcu_read_unlock(); + + if (res) return; if (gateway) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 9a849d7eb55d0f1599f499e01171bbc510a9d9af..7d9165beca8ac32c289ff84c73ba46583dab2fbe 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -579,12 +579,15 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, return NULL; } -static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) +static int ip_mc_autojoin_config(struct net *net, bool join, + const struct in_ifaddr *ifa) { +#if defined(CONFIG_IP_MULTICAST) struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ifa->ifa_address, .imr_ifindex = ifa->ifa_dev->dev->ifindex, }; + struct sock *sk = net->ipv4.mc_autojoin_sk; int ret; ASSERT_RTNL(); @@ -597,6 +600,9 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) release_sock(sk); return ret; +#else + return -EOPNOTSUPP; +#endif } static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -638,7 +644,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, continue; if (ipv4_is_multicast(ifa->ifa_address)) - ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa); + ip_mc_autojoin_config(net, false, ifa); __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); return 0; } @@ -896,8 +902,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { - int ret = ip_mc_config(net->ipv4.mc_autojoin_sk, - true, ifa); + int ret = ip_mc_autojoin_config(net, true, ifa); if (ret < 0) { inet_free_ifa(ifa); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index bb847d2807786a9e23c406fddd8c2cbf23636d75..3f9509679f0e1f439d6968ede8c633b39a8ec389 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2319,6 +2319,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) " %zd bytes, size of tnode: %zd bytes.\n", LEAF_SIZE, TNODE_SIZE(0)); + rcu_read_lock(); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; @@ -2338,7 +2339,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) trie_show_usage(seq, t->stats); #endif } + cond_resched_rcu(); } + rcu_read_unlock(); return 0; } diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 7efe740c06ebff66d5e50f1cc067bc5973fc58e3..4a5e55e94a9e56546729c41fcdeae10d69186dcb 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -60,7 +60,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); -/* Fills in tpi and returns header length to be pulled. */ +/* Fills in tpi and returns header length to be pulled. + * Note that caller must use pskb_may_pull() before pulling GRE header. + */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs) { @@ -114,8 +116,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + u8 _val, *val; + + val = skb_header_pointer(skb, nhs + hdr_len, + sizeof(_val), &_val); + if (!val) + return -EINVAL; tpi->proto = proto; - if ((*(u8 *)options & 0xF0) != 0x40) + if ((*val & 0xF0) != 0x40) hdr_len += 4; } tpi->hdr_len = hdr_len; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 45b00b76c430dffaa27c248606bfbf8902c43d4a..a403001c50af20e95b5ef88afd585525d777b9d5 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -482,8 +482,28 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } + out: release_sock(sk); + if (newsk && mem_cgroup_sockets_enabled) { + int amt; + + /* atomically get the memory usage, set and charge the + * newsk->sk_memcg. + */ + lock_sock(newsk); + + /* The socket has not been accepted yet, no need to look at + * newsk->sk_wmem_queued. + */ + amt = sk_mem_pages(newsk->sk_forward_alloc + + atomic_read(&newsk->sk_rmem_alloc)); + mem_cgroup_sk_alloc(newsk); + if (newsk->sk_memcg && amt) + mem_cgroup_charge_skmem(newsk->sk_memcg, amt); + + release_sock(newsk); + } if (req) reqsk_put(req); return newsk; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index eb158badebc43ece6b804d1aaa7a48ec2dad1c80..7ba013d6c00aff5b63c235006b93957801fd6532 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -105,13 +105,9 @@ static size_t inet_sk_attr_size(struct sock *sk, aux = handler->idiag_get_aux_size(sk, net_admin); return nla_total_size(sizeof(struct tcp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + nla_total_size(TCP_CA_NAME_MAX) + nla_total_size(sizeof(struct tcpvegas_info)) @@ -152,6 +148,24 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) goto errout; + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { + u32 classid = 0; + +#ifdef CONFIG_SOCK_CGROUP_DATA + classid = sock_cgroup_classid(&sk->sk_cgrp_data); +#endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; + + if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) + goto errout; + } + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); @@ -289,24 +303,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || - ext & (1 << (INET_DIAG_TCLASS - 1))) { - u32 classid = 0; - -#ifdef CONFIG_SOCK_CGROUP_DATA - classid = sock_cgroup_classid(&sk->sk_cgrp_data); -#endif - /* Fallback to socket priority if class id isn't set. - * Classful qdiscs use it as direct reference to class. - * For cgroup2 classid is always zero. - */ - if (!classid) - classid = sk->sk_priority; - - if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) - goto errout; - } - out: nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 404dc765f2bf82186ea81783d4829f15856619e0..f6793017a20d95f6af3c889256008459d50342b7 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -155,11 +155,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, cand = t; } - if (flags & TUNNEL_NO_KEY) - goto skip_key_lookup; - hlist_for_each_entry_rcu(t, head, hash_node) { - if (t->parms.i_key != key || + if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) || t->parms.iph.saddr != 0 || t->parms.iph.daddr != 0 || !(t->dev->flags & IFF_UP)) @@ -171,7 +168,6 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, cand = t; } -skip_key_lookup: if (cand) return cand; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 59384ffe89f730acf5cd32dfc1202af79be70d19..c1693d75e19687f47c9c5cef79fb44fb6b5fb94c 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -208,17 +208,39 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, int mtu; if (!dst) { - struct rtable *rt; - - fl->u.ip4.flowi4_oif = dev->ifindex; - fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; - rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); - if (IS_ERR(rt)) { + switch (skb->protocol) { + case htons(ETH_P_IP): { + struct rtable *rt; + + fl->u.ip4.flowi4_oif = dev->ifindex; + fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + dst = &rt->dst; + skb_dst_set(skb, dst); + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + fl->u.ip6.flowi6_oif = dev->ifindex; + fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; + dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); + if (dst->error) { + dst_release(dst); + dst = NULL; + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + skb_dst_set(skb, dst); + break; +#endif + default: dev->stats.tx_carrier_errors++; goto tx_error_icmp; } - dst = &rt->dst; - skb_dst_set(skb, dst); } dst_hold(dst); @@ -659,10 +681,8 @@ static int __init vti_init(void) msg = "ipip tunnel"; err = xfrm4_tunnel_register(&ipip_handler, AF_INET); - if (err < 0) { - pr_info("%s: cant't register tunnel\n",__func__); + if (err < 0) goto xfrm_tunnel_failed; - } msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 53a11894f9e4e93de80772bca442163d2200b47e..261a9813b88cd1171d0017b0b9bd96637df289a7 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -520,9 +520,11 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; /* hdrincl should be READ_ONCE(inet->hdrincl) - * but READ_ONCE() doesn't work with bit fields + * but READ_ONCE() doesn't work with bit fields. + * Doing this indirectly yields the same result. */ hdrincl = inet->hdrincl; + hdrincl = READ_ONCE(hdrincl); /* * Check the flags. */ diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 6367ecdf76c42ff85ff102a4eb4e5a1e81af1953..1d84b02ec76589897b83eb8b108ea2080df90565 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -99,8 +99,9 @@ static int raw_diag_dump_one(struct sk_buff *in_skb, if (IS_ERR(sk)) return PTR_ERR(sk); - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) { sock_put(sk); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8b855d3eec9e7562658bce1bfb5a3e34af62ceb6..3c298ec3220027efbfeadd2fc63301ea75df634d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -133,8 +133,6 @@ static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; -static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; - /* * Interface to generic destination cache. */ @@ -1014,21 +1012,22 @@ out: kfree_skb(skb); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; if (ip_mtu_locked(dst)) return; - if (ipv4_mtu(dst) < mtu) + if (old_mtu < mtu) return; if (mtu < ip_rt_min_pmtu) { lock = true; - mtu = ip_rt_min_pmtu; + mtu = min(old_mtu, ip_rt_min_pmtu); } - if (rt->rt_pmtu == mtu && + if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) return; @@ -2868,6 +2867,7 @@ void ip_rt_multicast_event(struct in_device *in_dev) static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; +static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, void __user *buffer, diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index d9ad986c7b2c9e073616c63d6d5ab376d2b72d5f..cc3f6da306c6110d9dbf1eac0e5c9874fa58929e 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -67,8 +67,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = -ENOMEM; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) goto out; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 94b8702603bc54f36542977ed0d9c3b93d43b6b7..35dbc8eb93964ce56afc320135be87ce336a26c3 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -76,9 +76,7 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); -#ifdef CONFIG_NETFILTER IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; -#endif return xfrm_output(sk, skb); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0203283272dc1df7e5e7e613652da128dbb53e13..bd7b97ec193ca651b14fd1f3fde04d9a0c08bebc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3217,6 +3217,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) if (netif_is_l3_master(idev->dev)) return; + /* no link local addresses on devices flagged as slaves */ + if (idev->dev->flags & IFF_SLAVE) + return; + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); switch (idev->cnf.addr_gen_mode) { @@ -3266,6 +3270,10 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_NONE) && (dev->type != ARPHRD_RAWIP)) { /* Alas, we support only Ethernet autoconfiguration. */ + idev = __in6_dev_get(dev); + if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP && + dev->flags & IFF_MULTICAST) + ipv6_mc_up(idev); return; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e5308d7cbd75c4fb67861082382122d445cf5b74..d43abeb1e4150ffd760fbe6766fdbe11b9b9d266 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -893,8 +893,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, found++; break; } - if (rt_can_ecmp) - fallback_ins = fallback_ins ?: ins; + fallback_ins = fallback_ins ?: ins; goto next_iter; } @@ -934,7 +933,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, } if (fallback_ins && !found) { - /* No ECMP-able route found, replace first non-ECMP one */ + /* No matching route with same ecmp-able-ness found, replace + * first matching route + */ ins = fallback_ins; iter = *ins; found++; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 396a0f61f5f88a4aa2ee3b30c1e69ab6b9642e4e..207bf342e99560e7c9dab8ccc085defea3862c39 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -315,7 +315,7 @@ static int vti6_rcv(struct sk_buff *skb) if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { rcu_read_unlock(); - return 0; + goto discard; } ipv6h = ipv6_hdr(skb); @@ -454,15 +454,33 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) int mtu; if (!dst) { - fl->u.ip6.flowi6_oif = dev->ifindex; - fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; - dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); - if (dst->error) { - dst_release(dst); - dst = NULL; + switch (skb->protocol) { + case htons(ETH_P_IP): { + struct rtable *rt; + + fl->u.ip4.flowi4_oif = dev->ifindex; + fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); + if (IS_ERR(rt)) + goto tx_err_link_failure; + dst = &rt->dst; + skb_dst_set(skb, dst); + break; + } + case htons(ETH_P_IPV6): + fl->u.ip6.flowi6_oif = dev->ifindex; + fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; + dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); + if (dst->error) { + dst_release(dst); + dst = NULL; + goto tx_err_link_failure; + } + skb_dst_set(skb, dst); + break; + default: goto tx_err_link_failure; } - skb_dst_set(skb, dst); } dst_hold(dst); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 5c91b05c8d8feb3207741a4d9b821df7e5a24cc6..337b43d4c3eb81f71e10d3a24f4314c48a3664f1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -185,9 +185,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = -EBUSY; break; } - } else if (sk->sk_protocol != IPPROTO_TCP) + } + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_prot != &tcpv6_prot) { + retv = -EBUSY; + break; + } + if (sk->sk_protocol != IPPROTO_TCP) break; - if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f98c37a7d5bf7d35ca496aacbfd479cd9af610a1..4eee004a278887f7fecde73ad5c183a93d3f48c8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3255,6 +3255,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, */ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_REPLACE); + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; nhn++; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 349e83d182300ce86a7ca965186dfa2296765847..e2cbb6286adf92177467ff7f1bca7a6662a1c3fa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -718,7 +718,6 @@ static void tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) { - bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = inet6_sk(sk_listener); @@ -726,7 +725,7 @@ static void tcp_v6_init_req(struct request_sock *req, ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; /* So that link locals have meaning */ - if ((!sk_listener->sk_bound_dev_if || l3_slave) && + if (!sk_listener->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 29dae7f2ff14d7bd5c5932b752e72da02b5537ac..aff901be53539a750b54eafc8be34cecf81bc558 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -130,9 +130,7 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); -#ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; -#endif return xfrm_output(sk, skb); } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 994dde6e5f9d91df43624915d85d143b8b623de1..986e9b6b961d206caa6dc68dc1d5cbb0c3dcd93e 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1137,7 +1137,8 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, } } - if (!(mpath->flags & MESH_PATH_RESOLVING)) + if (!(mpath->flags & MESH_PATH_RESOLVING) && + mesh_path_sel_is_hwmp(sdata)) mesh_queue_preq(mpath, PREQ_Q_F_START); if (skb_queue_len(&mpath->frame_queue) >= MESH_FRAME_QUEUE_LEN) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 36bd59ff49c4395281d231459347aa6e62d98f1e..ab26b8b954719652b4af392ae81bb66a533c048c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2273,7 +2273,7 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, if (!ieee80211_is_data(hdr->frame_control)) return; - if (ieee80211_is_nullfunc(hdr->frame_control) && + if (ieee80211_is_any_nullfunc(hdr->frame_control) && sdata->u.mgd.probe_send_count > 0) { if (ack) ieee80211_sta_reset_conn_monitor(sdata); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 45226c6aa45b1d30b5a86e7f9f95f2f1afd5cd90..f44dd79081222a2f807de0213c6e328cd864daae 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1255,8 +1255,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (ieee80211_is_ctl(hdr->frame_control) || - ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control) || + ieee80211_is_any_nullfunc(hdr->frame_control) || is_multicast_ether_addr(hdr->addr1)) return RX_CONTINUE; @@ -1643,8 +1642,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Drop (qos-)data::nullfunc frames silently, since they * are used only to control station power saving mode. */ - if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) { + if (ieee80211_is_any_nullfunc(hdr->frame_control)) { I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); /* @@ -2134,7 +2132,7 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) /* Drop unencrypted frames if key is set. */ if (unlikely(!ieee80211_has_protected(fc) && - !ieee80211_is_nullfunc(fc) && + !ieee80211_is_any_nullfunc(fc) && ieee80211_is_data(fc) && rx->key)) return -EACCES; @@ -3862,7 +3860,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&local->sta_mtx); - list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata && (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) continue; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 627dc642f894ee188c0bd14e6cd9746fa9aaa714..77ab9cc1a230a133df254d14145462b7f024c05f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -3,6 +3,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH + * Copyright (C) 2018-2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -951,6 +952,11 @@ static void __sta_info_destroy_part2(struct sta_info *sta) might_sleep(); lockdep_assert_held(&local->sta_mtx); + while (sta->sta_state == IEEE80211_STA_AUTHORIZED) { + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + WARN_ON_ONCE(ret); + } + /* now keys can no longer be reached */ ieee80211_free_sta_keys(local, sta); diff --git a/net/mac80211/status.c b/net/mac80211/status.c index fbe7354aeac7b4750dd7047ec3330d757e5c8d9e..fcfa6714e4924b960234d08eb537200abd2902b9 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -478,8 +478,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, rcu_read_lock(); sdata = ieee80211_sdata_from_skb(local, skb); if (sdata) { - if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) + if (ieee80211_is_any_nullfunc(hdr->frame_control)) cfg80211_probe_status(sdata->dev, hdr->addr1, cookie, acked, GFP_ATOMIC); @@ -856,7 +855,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, I802_DEBUG_INC(local->dot11FailedCount); } - if ((ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && + if (ieee80211_is_any_nullfunc(fc) && ieee80211_has_pm(fc) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) && !(info->flags & IEEE80211_TX_CTL_INJECTED) && diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 09c7aa519ca82fc17e2261fbf544a9e8f7fb595f..1b1f2d6cb3f4b6fab89676c1b6fa2ec73397e637 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -296,7 +296,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (unlikely(test_bit(SCAN_SW_SCANNING, &tx->local->scanning)) && test_bit(SDATA_STATE_OFFCHANNEL, &tx->sdata->state) && !ieee80211_is_probe_req(hdr->frame_control) && - !ieee80211_is_nullfunc(hdr->frame_control)) + !ieee80211_is_any_nullfunc(hdr->frame_control)) /* * When software scanning only nullfunc frames (to notify * the sleep state to the AP) and probe requests (for the @@ -3451,8 +3451,26 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, tx.skb = skb; tx.sdata = vif_to_sdata(info->control.vif); - if (txq->sta) + if (txq->sta) { tx.sta = container_of(txq->sta, struct sta_info, sta); + /* + * Drop unicast frames to unauthorised stations unless they are + * EAPOL frames from the local station. + */ + if (unlikely(ieee80211_is_data(hdr->frame_control) && + !ieee80211_vif_is_mesh(&tx.sdata->vif) && + tx.sdata->vif.type != NL80211_IFTYPE_OCB && + !is_multicast_ether_addr(hdr->addr1) && + !test_sta_flag(tx.sta, WLAN_STA_AUTHORIZED) && + (!(info->control.flags & + IEEE80211_TX_CTRL_PORT_CTRL_PROTO) || + !ether_addr_equal(tx.sdata->vif.addr, + hdr->addr2)))) { + I802_DEBUG_INC(local->tx_handlers_drop_unauth_port); + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } + } /* * The key can be removed while the packet was queued, so need to call diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 81f120466c38b9971a1cf7b02ae4596dfa4ac40e..cd3cdd1a0b57695607ad206cf715d4c004aabd42 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -944,16 +944,22 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elem_parse_failed = true; break; case WLAN_EID_VHT_OPERATION: - if (elen >= sizeof(struct ieee80211_vht_operation)) + if (elen >= sizeof(struct ieee80211_vht_operation)) { elems->vht_operation = (void *)pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_OPMODE_NOTIF: - if (elen > 0) + if (elen > 0) { elems->opmode_notif = pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_MESH_ID: elems->mesh_id = pos; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3a7d909a7f544ade465349ae2be19208aa12b1ed..f951c83559aded7fc54bd4368b5ca838cf081388 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -579,6 +579,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } +static inline bool +nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) +{ + return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && + nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple, + &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) && + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) && + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) && + net_eq(nf_ct_net(ct1), nf_ct_net(ct2)); +} + /* caller must hold rcu readlock and none of the nf_conntrack_locks */ static void nf_ct_gc_expired(struct nf_conn *ct) { @@ -772,19 +784,21 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); const struct nf_conntrack_l4proto *l4proto; + enum ip_conntrack_info oldinfo; + struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->allow_clash && - ((ct->status & IPS_NAT_DONE_MASK) == 0) && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { - enum ip_conntrack_info oldinfo; - struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); - - nf_ct_acct_merge(ct, ctinfo, loser_ct); - nf_conntrack_put(&loser_ct->ct_general); - nf_ct_set(skb, ct, oldinfo); - return NF_ACCEPT; + if (((ct->status & IPS_NAT_DONE_MASK) == 0) || + nf_ct_match(ct, loser_ct)) { + nf_ct_acct_merge(ct, ctinfo, loser_ct); + nf_conntrack_put(&loser_ct->ct_general); + nf_ct_set(skb, ct, oldinfo); + return NF_ACCEPT; + } + nf_ct_put(ct); } NF_CT_STAT_INC(net, drop); return NF_DROP; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 91490446ebb42ea9d70d91dd0a5ec1c9b8e02d81..5b8d5bfeb7ac5338aec36193d0a76756dc8167b0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3129,7 +3129,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, NFT_SET_INTERVAL | NFT_SET_TIMEOUT | NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) - return -EINVAL; + return -EOPNOTSUPP; /* Only one of these operations is supported */ if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) == (NFT_SET_MAP | NFT_SET_OBJECT)) @@ -3167,7 +3167,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); if (objtype == NFT_OBJECT_UNSPEC || objtype > NFT_OBJECT_MAX) - return -EINVAL; + return -EOPNOTSUPP; } else if (flags & NFT_SET_OBJECT) return -EINVAL; else diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d33ce6d5ebce92db2fab30cb4286c11ffd8c321a..dd1030f5dd5e290e88313895f882788272317e22 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -733,6 +733,8 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { [NFCTH_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN-1 }, [NFCTH_QUEUE_NUM] = { .type = NLA_U32, }, + [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, }, + [NFCTH_STATUS] = { .type = NLA_U32, }, }; static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index ce13a50b91893fd67a9c8586f7cca53278b1067d..ee190fa4dc34700db740fc81a070ea1ea309c40c 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -62,6 +62,13 @@ static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) return -1; } +static int nft_fwd_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS)); +} + static struct nft_expr_type nft_fwd_netdev_type; static const struct nft_expr_ops nft_fwd_netdev_ops = { .type = &nft_fwd_netdev_type, @@ -69,6 +76,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .eval = nft_fwd_netdev_eval, .init = nft_fwd_netdev_init, .dump = nft_fwd_netdev_dump, + .validate = nft_fwd_validate, }; static struct nft_expr_type nft_fwd_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index e110b0ebbf58b02b5b4ae8e9b8f9ad804ac861ba..19446a89a2a8158317c011fb662e406c4adafa91 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -121,6 +121,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index fe8e8a1622b59f3986a1cc65ba569a521b09f762..186f97f1c6c01fa57b0174c9d62d177d756b2fb2 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -845,6 +845,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3); } +#define HASHLIMIT_MAX_SIZE 1048576 + static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, struct xt_hashlimit_htable **hinfo, struct hashlimit_cfg3 *cfg, @@ -855,6 +857,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, if (cfg->gc_interval == 0 || cfg->expire == 0) return -EINVAL; + if (cfg->size > HASHLIMIT_MAX_SIZE) { + cfg->size = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("size too large, truncated to %u\n", cfg->size); + } + if (cfg->max > HASHLIMIT_MAX_SIZE) { + cfg->max = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("max too large, truncated to %u\n", cfg->max); + } if (par->family == NFPROTO_IPV4) { if (cfg->srcmask > 32 || cfg->dstmask > 32) return -EINVAL; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c67abda5d6391b3a208ab949913a8d2b9df9fc30..3e4e07559272286b0c74b21a9524728dd6ff8fe2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -997,7 +997,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->netlink_bind && groups) { int group; - for (group = 0; group < nlk->ngroups; group++) { + /* nl_groups is a u32, so cap the maximum groups we can bind */ + for (group = 0; group < BITS_PER_TYPE(u32); group++) { if (!test_bit(group, &groups)) continue; err = nlk->netlink_bind(net, group + 1); @@ -1016,7 +1017,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_undo_bind(nlk->ngroups, groups, sk); + netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk); goto unlock; } } @@ -2388,7 +2389,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, in_skb->len)) WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, (u8 *)extack->bad_attr - - in_skb->data)); + (u8 *)nlh)); } else { if (extack->cookie_len) WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 0c59354e280ed1fa5fc5e78c6ac0dd53d4fb5d70..d098bb8d53aa404ba5fbe9cae03d43d43a469451 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -199,6 +199,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, /* refcount initialized at 1 */ spin_unlock_bh(&nr_node_list_lock); + nr_neigh_put(nr_neigh); return 0; } nr_node_lock(nr_node); diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 6bf14f4f4b428e8283d19460331a7b9f26334b72..ae315dbd37322685d31a6e1e2772b6a656af1bc0 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -193,13 +193,20 @@ void nfc_hci_resp_received(struct nfc_hci_dev *hdev, u8 result, void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, struct sk_buff *skb) { - u8 gate = hdev->pipes[pipe].gate; u8 status = NFC_HCI_ANY_OK; struct hci_create_pipe_resp *create_info; struct hci_delete_pipe_noti *delete_info; struct hci_all_pipe_cleared_noti *cleared_info; + u8 gate; - pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); + pr_debug("from pipe %x cmd %x\n", pipe, cmd); + + if (pipe >= NFC_HCI_MAX_PIPES) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + + gate = hdev->pipes[pipe].gate; switch (cmd) { case NFC_HCI_ADM_NOTIFY_PIPE_CREATED: @@ -387,8 +394,14 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, struct sk_buff *skb) { int r = 0; - u8 gate = hdev->pipes[pipe].gate; + u8 gate; + + if (pipe >= NFC_HCI_MAX_PIPES) { + pr_err("Discarded event %x to invalid pipe %x\n", event, pipe); + goto exit; + } + gate = hdev->pipes[pipe].gate; if (gate == NFC_HCI_INVALID_GATE) { pr_err("Discarded event %x to unopened pipe %x\n", event, pipe); goto exit; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 7b8d4d235a3a14d19d7a47d53b3338668c2572c2..6199f4334fbd271c26726a482cc0bca74ebd9192 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -55,7 +55,10 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, .len = NFC_FIRMWARE_NAME_MAXSIZE }, + [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY }, + [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 }, + [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, }; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 69a50fad81dcb95aee22742822365105259b5e3d..42ee9a5bfc5c0358736668e738a2a0f80b4f4a08 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2204,6 +2204,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct timespec ts; __u32 ts_status; bool is_drop_n_account = false; + unsigned int slot_id = 0; bool do_vnet = false; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. @@ -2299,6 +2300,20 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto drop_n_account; + + if (po->tp_version <= TPACKET_V2) { + slot_id = po->rx_ring.head; + if (test_bit(slot_id, po->rx_ring.rx_owner_map)) + goto drop_n_account; + __set_bit(slot_id, po->rx_ring.rx_owner_map); + } + + if (do_vnet && + virtio_net_hdr_from_skb(skb, h.raw + macoff - + sizeof(struct virtio_net_hdr), + vio_le(), true, 0)) + goto drop_n_account; + if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); /* @@ -2311,12 +2326,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, status |= TP_STATUS_LOSING; } - if (do_vnet && - virtio_net_hdr_from_skb(skb, h.raw + macoff - - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) - goto drop_n_account; - po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; @@ -2404,7 +2413,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, #endif if (po->tp_version <= TPACKET_V2) { + spin_lock(&sk->sk_receive_queue.lock); __packet_set_status(po, h.raw, status); + __clear_bit(slot_id, po->rx_ring.rx_owner_map); + spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); } else { prb_clear_blk_fill_status(&po->rx_ring); @@ -4297,6 +4309,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, { struct pgv *pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); + unsigned long *rx_owner_map = NULL; int was_running, order = 0; struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; @@ -4382,6 +4395,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } break; default: + if (!tx_ring) { + rx_owner_map = bitmap_alloc(req->tp_frame_nr, + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + if (!rx_owner_map) + goto out_free_pg_vec; + } break; } } @@ -4411,6 +4430,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); + if (po->tp_version <= TPACKET_V2) + swap(rb->rx_owner_map, rx_owner_map); rb->frame_max = (req->tp_frame_nr - 1); rb->head = 0; rb->frame_size = req->tp_frame_size; @@ -4442,6 +4463,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } out_free_pg_vec: + bitmap_free(rx_owner_map); if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: diff --git a/net/packet/internal.h b/net/packet/internal.h index c70a2794456f15559a31de428f6fa6cc8604e090..f10294800aafb3673083acd87c21944e313d9a6a 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -70,7 +70,10 @@ struct packet_ring_buffer { unsigned int __percpu *pending_refcnt; - struct tpacket_kbdq_core prb_bdqc; + union { + unsigned long *rx_owner_map; + struct tpacket_kbdq_core prb_bdqc; + }; }; extern struct mutex fanout_mutex; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 15182efbbf4e04e38c9d6815b53e3ecce8783ddd..d838f5aca42a004fa3fa3eb1876abc60c96dddf4 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1540,20 +1540,21 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) node = NULL; srv_node = NULL; if (addr->sq_node == QRTR_NODE_BCAST) { - enqueue_fn = qrtr_bcast_enqueue; - if (addr->sq_port != QRTR_PORT_CTRL) { + if (addr->sq_port != QRTR_PORT_CTRL && + qrtr_local_nid != QRTR_NODE_BCAST) { release_sock(sk); - return -EINVAL; + return -ENOTCONN; } + enqueue_fn = qrtr_bcast_enqueue; } else if (addr->sq_node == ipc->us.sq_node) { enqueue_fn = qrtr_local_enqueue; } else { - enqueue_fn = qrtr_node_enqueue; node = qrtr_node_lookup(addr->sq_node); if (!node) { release_sock(sk); return -ECONNRESET; } + enqueue_fn = qrtr_node_enqueue; if (ipc->state > QRTR_STATE_INIT && ipc->state != node->nid) ipc->state = QRTR_STATE_MULTI; diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index e7f6b8823eb6e6c6794ed78e1197c1cf8aab4858..ad9d1b21cb0ba4ead61569fec8a125e30576e1c7 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -35,7 +35,7 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *); static void rxrpc_destroy(struct key *); static void rxrpc_destroy_s(struct key *); static void rxrpc_describe(const struct key *, struct seq_file *); -static long rxrpc_read(const struct key *, char __user *, size_t); +static long rxrpc_read(const struct key *, char *, size_t); /* * rxrpc defined keys take an arbitrary string as the description and an @@ -1044,12 +1044,12 @@ EXPORT_SYMBOL(rxrpc_get_null_key); * - this returns the result in XDR form */ static long rxrpc_read(const struct key *key, - char __user *buffer, size_t buflen) + char *buffer, size_t buflen) { const struct rxrpc_key_token *token; const struct krb5_principal *princ; size_t size; - __be32 __user *xdr, *oldxdr; + __be32 *xdr, *oldxdr; u32 cnlen, toksize, ntoks, tok, zero; u16 toksizes[AFSTOKEN_MAX]; int loop; @@ -1126,30 +1126,25 @@ static long rxrpc_read(const struct key *key, if (!buffer || buflen < size) return size; - xdr = (__be32 __user *) buffer; + xdr = (__be32 *)buffer; zero = 0; #define ENCODE(x) \ do { \ - __be32 y = htonl(x); \ - if (put_user(y, xdr++) < 0) \ - goto fault; \ + *xdr++ = htonl(x); \ } while(0) #define ENCODE_DATA(l, s) \ do { \ u32 _l = (l); \ ENCODE(l); \ - if (copy_to_user(xdr, (s), _l) != 0) \ - goto fault; \ - if (_l & 3 && \ - copy_to_user((u8 __user *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \ - goto fault; \ + memcpy(xdr, (s), _l); \ + if (_l & 3) \ + memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \ xdr += (_l + 3) >> 2; \ } while(0) #define ENCODE64(x) \ do { \ __be64 y = cpu_to_be64(x); \ - if (copy_to_user(xdr, &y, 8) != 0) \ - goto fault; \ + memcpy(xdr, &y, 8); \ xdr += 8 >> 2; \ } while(0) #define ENCODE_STR(s) \ @@ -1240,8 +1235,4 @@ static long rxrpc_read(const struct key *key, ASSERTCMP((char __user *) xdr - buffer, ==, size); _leave(" = %zu", size); return size; - -fault: - _leave(" = -EFAULT"); - return -EFAULT; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1879665e5a2bcd567d0a3926d329d7ccf8836f8f..8974bd25c71e8b85f3fccb388176f1ea1f6baf6e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -160,6 +160,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (!atomic_read(&head->ht.nelems)) return -1; + flow_dissector_init_keys(&skb_key.control, &skb_key.basic); fl_clear_masked_range(&skb_key, &head->mask); info = skb_tunnel_info(skb); @@ -445,6 +446,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_IP_TOS_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_IP_TTL] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_IP_TTL_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_FLAGS] = { .type = NLA_U32 }, }; static void fl_set_key_val(struct nlattr **tb, diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index d8fd152779c8d451bfaa0b028f3e3a94b723731d..a985f91e8b476222aefa72df3c8e4a4df9f2cc67 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -136,6 +136,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle) static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { [TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC }, [TCA_MATCHALL_CLASSID] = { .type = NLA_U32 }, + [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 }, }; static int mall_set_parms(struct net *net, struct tcf_proto *tp, diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index ac9a5b8825b9d572a0e60453fc6c68e82bdab320..4f133faa9e60d73808770358877732af3b02d29d 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -539,8 +539,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, fp = &b->ht[h]; for (pfp = rtnl_dereference(*fp); pfp; fp = &pfp->next, pfp = rtnl_dereference(*fp)) { - if (pfp == f) { - *fp = f->next; + if (pfp == fold) { + rcu_assign_pointer(*fp, fold->next); break; } } diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 796b4e1beb12aa8df8ad880f3eaef9eda4929033..c2d2c054a4e495a7df305842e10345b5bb893e36 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -375,6 +375,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tcindex_alloc_perfect_hash(cp) < 0) goto errout; + cp->alloc_hash = cp->hash; for (i = 0; i < min(cp->hash, p->hash); i++) cp->perfect[i].res = p->perfect[i].res; balloc = 1; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 7a944f508cae82d07669db3a0219aff7fc424d40..66f1d40b910aca458ea1553a13b0be64ad59df11 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -695,6 +695,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 }, [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, }; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 53a66ee1331f0915bf04969603e51c8cf3515577..18efb8cc46932735123fa41e7c3c6b56d0bf9ba7 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -235,7 +235,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, { struct sctp_association *asoc = t->asoc; struct dst_entry *dst = NULL; - struct flowi6 *fl6 = &fl->u.ip6; + struct flowi _fl; + struct flowi6 *fl6 = &_fl.u.ip6; struct sctp_bind_addr *bp; struct ipv6_pinfo *np = inet6_sk(sk); struct sctp_sockaddr_entry *laddr; @@ -245,7 +246,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, enum sctp_scope scope; __u8 matchlen = 0; - memset(fl6, 0, sizeof(struct flowi6)); + memset(&_fl, 0, sizeof(_fl)); fl6->daddr = daddr->v6.sin6_addr; fl6->fl6_dport = daddr->v6.sin6_port; fl6->flowi6_proto = IPPROTO_SCTP; @@ -271,8 +272,11 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, fl6, final_p); - if (!asoc || saddr) + if (!asoc || saddr) { + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); goto out; + } bp = &asoc->base.bind_addr; scope = sctp_scope(daddr); @@ -295,6 +299,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if ((laddr->a.sa.sa_family == AF_INET6) && (sctp_v6_cmp_addr(&dst_saddr, &laddr->a))) { rcu_read_unlock(); + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); goto out; } } @@ -333,6 +339,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (!IS_ERR_OR_NULL(dst)) dst_release(dst); dst = bdst; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); break; } @@ -346,6 +354,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, dst_release(dst); dst = bdst; matchlen = bmatchlen; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); } rcu_read_unlock(); @@ -354,14 +364,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct rt6_info *rt; rt = (struct rt6_info *)dst; - t->dst = dst; t->dst_cookie = rt6_get_cookie(rt); pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n", &rt->rt6i_dst.addr, rt->rt6i_dst.plen, - &fl6->saddr); + &fl->u.ip6.saddr); } else { t->dst = NULL; - pr_debug("no route\n"); } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index bf39f317953afdf5bacadda70bbf881ceb57278d..785456df750518aacd7bdc03e1922d1c2a67bfee 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -435,14 +435,15 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, { struct sctp_association *asoc = t->asoc; struct rtable *rt; - struct flowi4 *fl4 = &fl->u.ip4; + struct flowi _fl; + struct flowi4 *fl4 = &_fl.u.ip4; struct sctp_bind_addr *bp; struct sctp_sockaddr_entry *laddr; struct dst_entry *dst = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; - memset(fl4, 0x0, sizeof(struct flowi4)); + memset(&_fl, 0x0, sizeof(_fl)); fl4->daddr = daddr->v4.sin_addr.s_addr; fl4->fl4_dport = daddr->v4.sin_port; fl4->flowi4_proto = IPPROTO_SCTP; @@ -460,8 +461,11 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, &fl4->saddr); rt = ip_route_output_key(sock_net(sk), fl4); - if (!IS_ERR(rt)) + if (!IS_ERR(rt)) { dst = &rt->dst; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); + } /* If there is no association or if a source address is passed, no * more validation is required. @@ -524,27 +528,33 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr, false); if (!odev || odev->ifindex != fl4->flowi4_oif) { - if (!dst) + if (!dst) { dst = &rt->dst; - else + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); + } else { dst_release(&rt->dst); + } continue; } dst_release(dst); dst = &rt->dst; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); break; } out_unlock: rcu_read_unlock(); out: - t->dst = dst; - if (dst) + if (dst) { pr_debug("rt_dst:%pI4, rt_src:%pI4\n", - &fl4->daddr, &fl4->saddr); - else + &fl->u.ip4.daddr, &fl->u.ip4.saddr); + } else { + t->dst = NULL; pr_debug("no route\n"); + } } /* For v4, the source address is cached in the route entry(dst). So no need diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 75274a60b77ab0f40903ae0ea38c633085b0a336..6a5a3dfa6c8d660fdd6b311d043776c7c51e7591 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -221,15 +221,11 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index f67df16bd3409b6a4427b2d102ec4f25e0c2cae7..e698edd56bd56295f9155566e3b1f322d34fe1f8 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -858,7 +858,11 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, struct sctp_chunk *retval; __u32 ctsn; - ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); + if (chunk && chunk->asoc) + ctsn = sctp_tsnmap_get_ctsn(&chunk->asoc->peer.tsn_map); + else + ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); + shut.cum_tsn_ack = htonl(ctsn); retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a2e058127ef7a77b29afb9a6a7578c7ce670b06b..ba29d782af30d52c5f45df6b25cbcf4411243c8c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -182,6 +182,16 @@ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk, return true; } +/* Check for format error in an ABORT chunk */ +static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk) +{ + struct sctp_errhdr *err; + + sctp_walk_errors(err, chunk->chunk_hdr); + + return (void *)err == (void *)chunk->chunk_end; +} + /********************************************************** * These are the state functions for handling chunk events. **********************************************************/ @@ -2202,6 +2212,9 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2245,6 +2258,9 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); @@ -2512,6 +2528,9 @@ enum sctp_disposition sctp_sf_do_9_1_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2529,16 +2548,8 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); - if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) { - struct sctp_errhdr *err; - - sctp_walk_errors(err, chunk->chunk_hdr); - if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, - commands); - + if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) error = ((struct sctp_errhdr *)chunk->skb->data)->cause; - } sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 09cda66d0567ac5f1f8c8691a51b665d78553639..442780515760e290915e0f61f2671b5ec2ef7b6a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -175,29 +175,44 @@ static void sctp_clear_owner_w(struct sctp_chunk *chunk) skb_orphan(chunk->skb); } +#define traverse_and_process() \ +do { \ + msg = chunk->msg; \ + if (msg == prev_msg) \ + continue; \ + list_for_each_entry(c, &msg->chunks, frag_list) { \ + if ((clear && asoc->base.sk == c->skb->sk) || \ + (!clear && asoc->base.sk != c->skb->sk)) \ + cb(c); \ + } \ + prev_msg = msg; \ +} while (0) + static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, + bool clear, void (*cb)(struct sctp_chunk *)) { + struct sctp_datamsg *msg, *prev_msg = NULL; struct sctp_outq *q = &asoc->outqueue; + struct sctp_chunk *chunk, *c; struct sctp_transport *t; - struct sctp_chunk *chunk; list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) list_for_each_entry(chunk, &t->transmitted, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->retransmit, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->sacked, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->abandoned, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->out_chunk_list, list) - cb(chunk); + traverse_and_process(); } /* Verify that this is a valid address. */ @@ -8280,9 +8295,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * paths won't try to lock it and then oldsk. */ lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); - sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w); + sctp_for_each_tx_datachunk(assoc, true, sctp_clear_owner_w); sctp_assoc_migrate(assoc, newsk); - sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w); + sctp_for_each_tx_datachunk(assoc, false, sctp_set_owner_w); /* If the association on the newsk is already closed before accept() * is called, set RCV_SHUTDOWN flag. diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index d2d01cf7022445d3d55f7d791b07f36655726d0d..576c37d860514b0d8014d97904791aa4bcb2c07d 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -38,15 +38,14 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); + memset(r, 0, sizeof(*r)); r->diag_family = sk->sk_family; + sock_diag_save_cookie(sk, r->id.idiag_cookie); if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); r->id.idiag_dport = smc->clcsock->sk->sk_dport; r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; - sock_diag_save_cookie(sk, r->id.idiag_cookie); - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; } diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 4410d007151542c8dfc12e57f2c942662b9958d3..7d89b0584944bf15f0dcda1a1c7a1f6e242c6d52 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -513,6 +513,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) struct smc_ib_device *smcibdev; smcibdev = ib_get_client_data(ibdev, &smc_ib_client); + if (!smcibdev || smcibdev->ibdev != ibdev) + return; ib_set_client_data(ibdev, &smc_ib_client, NULL); spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index a9c0f368db5d27ed72159a2395a4a4ec60234ed0..24e18405cdb48fff6090831f656b4f883c926109 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -7,9 +7,13 @@ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct device *pdev = wiphy_dev(wdev->wiphy); - strlcpy(info->driver, wiphy_dev(wdev->wiphy)->driver->name, - sizeof(info->driver)); + if (pdev->driver) + strlcpy(info->driver, pdev->driver->name, + sizeof(info->driver)); + else + strlcpy(info->driver, "N/A", sizeof(info->driver)); strlcpy(info->version, init_utsname()->release, sizeof(info->version)); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ac976dd5fc07b9ec7f72f9c2db19cd4688e1f3ea..3a3b7cd9d9fb10ae1cf87005b312edb17ffb7190 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -321,6 +321,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 }, [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, + [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, @@ -346,6 +347,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, [NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_PLINK_STATE] = { .type = NLA_U8 }, + [NL80211_ATTR_MEASUREMENT_DURATION] = { .type = NLA_U16 }, + [NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY] = { .type = NLA_FLAG }, [NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 }, [NL80211_ATTR_REKEY_DATA] = { .type = NLA_NESTED }, [NL80211_ATTR_SCAN_SUPP_RATES] = { .type = NLA_NESTED }, @@ -394,6 +397,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 }, + [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = { .type = NLA_U16 }, [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 }, [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG }, @@ -419,6 +424,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 }, [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 }, + [NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN }, [NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG }, [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 8692ef603e76f8a79c06407438cd66b00ea55f94..23a6f7ff2d4707d4f4f50c80534f8fe3368936e5 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1740,7 +1740,7 @@ static void handle_channel_custom(struct wiphy *wiphy, break; } - if (IS_ERR(reg_rule)) { + if (IS_ERR_OR_NULL(reg_rule)) { pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n", chan->center_freq); if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 39231237e1c3b3806dc956429195a59edf46bfd6..30f71620d4e3f3273fc5e682eee5fd3112acab55 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -120,8 +120,10 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, goto drop; } - if (!pskb_may_pull(skb, 1)) + if (!pskb_may_pull(skb, 1)) { + x25_neigh_put(nb); return 0; + } switch (skb->data[0]) { diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index f1edb3584efc77b9d827f3e8665017fa62eb434c..f8a71a5b4507fe0f001cddf0b1980acb73c4a6d7 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -188,6 +188,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void return xfrm_dev_feat_change(dev); case NETDEV_DOWN: + case NETDEV_UNREGISTER: return xfrm_dev_down(dev); } return NOTIFY_DONE; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3607538387f81131f33eb10dde67e9a78d8d2418..5f5ad0dff5a40d8ba5b6a243c0de35cfb3ce82d7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -309,7 +309,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy); static void xfrm_policy_kill(struct xfrm_policy *policy) { + write_lock_bh(&policy->lock); policy->walk.dead = 1; + write_unlock_bh(&policy->lock); atomic_inc(&policy->genid); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ba152c510eb2b2850e07319c65827f334aed28b4..cf220e77a59dab743f1d950269aca7b90b6417b4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -109,7 +109,8 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs) return 0; uctx = nla_data(rt); - if (uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) + if (uctx->len > nla_len(rt) || + uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) return -EINVAL; return 0; @@ -2265,6 +2266,9 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, xfrm_mark_get(attrs, &mark); err = verify_newpolicy_info(&ua->policy); + if (err) + goto free_state; + err = verify_sec_ctx_len(attrs); if (err) goto free_state; diff --git a/scripts/Makefile b/scripts/Makefile index 25ab143cbe148b3fcd04432b77ceb929c0dc5d8f..c1d31dd3d760c67fd6f338ae40e27a314324ea6c 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -30,6 +30,8 @@ always := $(hostprogs-y) $(hostprogs-m) # The following hostprogs-y programs are only build on demand hostprogs-y += unifdef +extra-$(CONFIG_LTO_CLANG) += module-lto.lds + # These targets are used internally to avoid "is up to date" messages PHONY += build_unifdef build_unifdef: $(obj)/unifdef diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 8d5357053f8653d83e51f86d935341dd9f39f53d..486e135d3e30a57a9eb1d3b27139545161cc6df1 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -72,5 +72,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format) KBUILD_CFLAGS += $(call cc-disable-warning, sign-compare) KBUILD_CFLAGS += $(call cc-disable-warning, format-zero-length) KBUILD_CFLAGS += $(call cc-disable-warning, uninitialized) +KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) endif endif diff --git a/scripts/config b/scripts/config index e0e39826dae90ab735d1510e882bd32a9536e005..eee5b7f3a092ae4ac46bcdf92ffe8bb3c643aafd 100755 --- a/scripts/config +++ b/scripts/config @@ -7,6 +7,9 @@ myname=${0##*/} # If no prefix forced, use the default CONFIG_ CONFIG_="${CONFIG_-CONFIG_}" +# We use an uncommon delimiter for sed substitutions +SED_DELIM=$(echo -en "\001") + usage() { cat >&2 <"$tmpfile" + sed -e "s$SED_DELIM$before$SED_DELIM$after$SED_DELIM" "$infile" >"$tmpfile" # replace original file with the edited one mv "$tmpfile" "$infile" } diff --git a/scripts/dtc/dtc-lexer.l b/scripts/dtc/dtc-lexer.l index fd825ebba69cb25e160e057f53bef994970093fc..24af549977584c33b302a470fe90fd9b42e6539d 100644 --- a/scripts/dtc/dtc-lexer.l +++ b/scripts/dtc/dtc-lexer.l @@ -38,7 +38,6 @@ LINECOMMENT "//".*\n #include "srcpos.h" #include "dtc-parser.tab.h" -YYLTYPE yylloc; extern bool treesource_error; /* CAUTION: this will stop working if we ever use yyless() or yyunput() */ diff --git a/scripts/dtc/dtc-lexer.lex.c_shipped b/scripts/dtc/dtc-lexer.lex.c_shipped index 64c243772398d614d7cd0a6ec9cfd0d6baeb8da3..9db3a409c5071a2eda0981648978f999d6dc200d 100644 --- a/scripts/dtc/dtc-lexer.lex.c_shipped +++ b/scripts/dtc/dtc-lexer.lex.c_shipped @@ -631,7 +631,6 @@ char *yytext; #include "srcpos.h" #include "dtc-parser.tab.h" -YYLTYPE yylloc; extern bool treesource_error; /* CAUTION: this will stop working if we ever use yyless() or yyunput() */ diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 27aac273205bacf5456680a914a024bbf36960e0..fa423fcd1a9285b12c024b8829f26ace0f2a99a0 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -1238,7 +1238,7 @@ bool conf_set_all_new_symbols(enum conf_def_mode mode) sym_calc_value(csym); if (mode == def_random) - has_changed = randomize_choice_values(csym); + has_changed |= randomize_choice_values(csym); else { set_all_choice_values(csym); has_changed = true; diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index 959199c3147ec79e2797e0c87cf47b0e03723a25..29a1589e2f0bcef277e5a21fe7b7ad90f052ae83 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -6,6 +6,7 @@ ARCH=$2 SMP=$3 PREEMPT=$4 CC=$5 +LD=$6 vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; } @@ -77,7 +78,10 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" echo \#define LINUX_COMPILE_BY \"`echo $LINUX_COMPILE_BY | $UTS_TRUNCATE`\" echo \#define LINUX_COMPILE_HOST \"`echo $LINUX_COMPILE_HOST | $UTS_TRUNCATE`\" - echo \#define LINUX_COMPILER \"`$CC -v 2>&1 | grep ' version ' | sed 's/[[:space:]]*$//'`\" + CC_VERSION=$($CC -v 2>&1 | grep ' version ' | sed 's/[[:space:]]*$//') + LD_VERSION=$($LD -v | head -n1 | sed 's/(compatible with [^)]*)//' \ + | sed 's/[[:space:]]*$//') + printf '#define LINUX_COMPILER "%s"\n' "$CC_VERSION, $LD_VERSION" ) > .tmpcompile # Only replace the real compile.h if the new one is different, diff --git a/scripts/module-lto.lds b/scripts/module-lto.lds deleted file mode 100644 index f5ee544a877d13f72ca12882bc16dab7a3eaa0bf..0000000000000000000000000000000000000000 --- a/scripts/module-lto.lds +++ /dev/null @@ -1,22 +0,0 @@ -/* - * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and - * -ffunction-sections, which increases the size of the final module. - * Merge the split sections in the final binary. - */ -SECTIONS { - /* - * LLVM may emit .eh_frame with CONFIG_CFI_CLANG despite - * -fno-asynchronous-unwind-tables. Discard the section. - */ - /DISCARD/ : { - *(.eh_frame) - } - - .bss : { *(.bss .bss[.0-9a-zA-Z_]*) } - .data : { *(.data .data[.0-9a-zA-Z_]*) } - .rela.data : { *(.rela.data .rela.data[.0-9a-zA-Z_]*) } - .rela.rodata : { *(.rela.rodata .rela.rodata[.0-9a-zA-Z_]*) } - .rela.text : { *(.rela.text .rela.text[.0-9a-zA-Z_]*) } - .rodata : { *(.rodata .rodata[.0-9a-zA-Z_]*) } - .text : { *(.text .text[.0-9a-zA-Z_]*) } -} diff --git a/scripts/module-lto.lds.S b/scripts/module-lto.lds.S new file mode 100644 index 0000000000000000000000000000000000000000..c0f4fdeb84a08334f8ff413ddc833fc2c0bfc6ca --- /dev/null +++ b/scripts/module-lto.lds.S @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include + +/* + * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and + * -ffunction-sections, which increases the size of the final module. + * Merge the split sections in the final binary. + */ +SECTIONS { + /* + * LLVM may emit .eh_frame with CONFIG_CFI_CLANG despite + * -fno-asynchronous-unwind-tables. Discard the section. + */ + /DISCARD/ : { + *(.eh_frame) + } + + .bss : { *(.bss .bss.[0-9a-zA-Z_]*) } + .data : { *(.data .data.[0-9a-zA-Z_]*) } + .rela.data : { *(.rela.data .rela.data.[0-9a-zA-Z_]*) } + .rela.rodata : { *(.rela.rodata .rela.rodata.[0-9a-zA-Z_]*) } + .rela.text : { *(.rela.text .rela.text.[0-9a-zA-Z_]*) } + .rodata : { *(.rodata .rodata.[0-9a-zA-Z_]*) } + + /* + * With CFI_CLANG, ensure __cfi_check is at the beginning of the + * .text section, and that the section is aligned to page size. + */ + .text : ALIGN(PAGE_SIZE) { + *(.text.__cfi_check) + *(.text .text.[0-9a-zA-Z_]* .text..L.cfi*) + } + +} diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl old mode 100644 new mode 100755 diff --git a/security/keys/big_key.c b/security/keys/big_key.c index 929e14978c421b227e592e937d9157ca2235b2f7..1957275ad2af38a1559baa67ca057301d8d78324 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -22,6 +22,13 @@ #include #include +struct big_key_buf { + unsigned int nr_pages; + void *virt; + struct scatterlist *sg; + struct page *pages[]; +}; + /* * Layout of key payload words. */ @@ -91,10 +98,9 @@ static DEFINE_MUTEX(big_key_aead_lock); /* * Encrypt/decrypt big_key data */ -static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) +static int big_key_crypt(enum big_key_op op, struct big_key_buf *buf, size_t datalen, u8 *key) { int ret; - struct scatterlist sgio; struct aead_request *aead_req; /* We always use a zero nonce. The reason we can get away with this is * because we're using a different randomly generated key for every @@ -109,8 +115,7 @@ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) return -ENOMEM; memset(zero_nonce, 0, sizeof(zero_nonce)); - sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0)); - aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce); + aead_request_set_crypt(aead_req, buf->sg, buf->sg, datalen, zero_nonce); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); aead_request_set_ad(aead_req, 0); @@ -129,22 +134,82 @@ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) return ret; } +/* + * Free up the buffer. + */ +static void big_key_free_buffer(struct big_key_buf *buf) +{ + unsigned int i; + + if (buf->virt) { + memset(buf->virt, 0, buf->nr_pages * PAGE_SIZE); + vunmap(buf->virt); + } + + for (i = 0; i < buf->nr_pages; i++) + if (buf->pages[i]) + __free_page(buf->pages[i]); + + kfree(buf); +} + +/* + * Allocate a buffer consisting of a set of pages with a virtual mapping + * applied over them. + */ +static void *big_key_alloc_buffer(size_t len) +{ + struct big_key_buf *buf; + unsigned int npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned int i, l; + + buf = kzalloc(sizeof(struct big_key_buf) + + sizeof(struct page) * npg + + sizeof(struct scatterlist) * npg, + GFP_KERNEL); + if (!buf) + return NULL; + + buf->nr_pages = npg; + buf->sg = (void *)(buf->pages + npg); + sg_init_table(buf->sg, npg); + + for (i = 0; i < buf->nr_pages; i++) { + buf->pages[i] = alloc_page(GFP_KERNEL); + if (!buf->pages[i]) + goto nomem; + + l = min_t(size_t, len, PAGE_SIZE); + sg_set_page(&buf->sg[i], buf->pages[i], l, 0); + len -= l; + } + + buf->virt = vmap(buf->pages, buf->nr_pages, VM_MAP, PAGE_KERNEL); + if (!buf->virt) + goto nomem; + + return buf; + +nomem: + big_key_free_buffer(buf); + return NULL; +} + /* * Preparse a big key */ int big_key_preparse(struct key_preparsed_payload *prep) { + struct big_key_buf *buf; struct path *path = (struct path *)&prep->payload.data[big_key_path]; struct file *file; u8 *enckey; - u8 *data = NULL; ssize_t written; - size_t datalen = prep->datalen; + size_t datalen = prep->datalen, enclen = datalen + ENC_AUTHTAG_SIZE; int ret; - ret = -EINVAL; if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data) - goto error; + return -EINVAL; /* Set an arbitrary quota */ prep->quotalen = 16; @@ -157,13 +222,12 @@ int big_key_preparse(struct key_preparsed_payload *prep) * * File content is stored encrypted with randomly generated key. */ - size_t enclen = datalen + ENC_AUTHTAG_SIZE; loff_t pos = 0; - data = kmalloc(enclen, GFP_KERNEL); - if (!data) + buf = big_key_alloc_buffer(enclen); + if (!buf) return -ENOMEM; - memcpy(data, prep->data, datalen); + memcpy(buf->virt, prep->data, datalen); /* generate random key */ enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL); @@ -176,7 +240,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) goto err_enckey; /* encrypt aligned data */ - ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey); + ret = big_key_crypt(BIG_KEY_ENC, buf, datalen, enckey); if (ret) goto err_enckey; @@ -187,7 +251,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) goto err_enckey; } - written = kernel_write(file, data, enclen, &pos); + written = kernel_write(file, buf->virt, enclen, &pos); if (written != enclen) { ret = written; if (written >= 0) @@ -202,7 +266,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) *path = file->f_path; path_get(path); fput(file); - kzfree(data); + big_key_free_buffer(buf); } else { /* Just store the data in a buffer */ void *data = kmalloc(datalen, GFP_KERNEL); @@ -220,7 +284,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) err_enckey: kzfree(enckey); error: - kzfree(data); + big_key_free_buffer(buf); return ret; } @@ -289,7 +353,7 @@ void big_key_describe(const struct key *key, struct seq_file *m) * read the key data * - the key's semaphore is read-locked */ -long big_key_read(const struct key *key, char __user *buffer, size_t buflen) +long big_key_read(const struct key *key, char *buffer, size_t buflen) { size_t datalen = (size_t)key->payload.data[big_key_len]; long ret; @@ -298,15 +362,15 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) return datalen; if (datalen > BIG_KEY_FILE_THRESHOLD) { + struct big_key_buf *buf; struct path *path = (struct path *)&key->payload.data[big_key_path]; struct file *file; - u8 *data; u8 *enckey = (u8 *)key->payload.data[big_key_data]; size_t enclen = datalen + ENC_AUTHTAG_SIZE; loff_t pos = 0; - data = kmalloc(enclen, GFP_KERNEL); - if (!data) + buf = big_key_alloc_buffer(enclen); + if (!buf) return -ENOMEM; file = dentry_open(path, O_RDONLY, current_cred()); @@ -316,31 +380,28 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) } /* read file to kernel and decrypt */ - ret = kernel_read(file, data, enclen, &pos); + ret = kernel_read(file, buf->virt, enclen, &pos); if (ret >= 0 && ret != enclen) { ret = -EIO; goto err_fput; } - ret = big_key_crypt(BIG_KEY_DEC, data, enclen, enckey); + ret = big_key_crypt(BIG_KEY_DEC, buf, enclen, enckey); if (ret) goto err_fput; ret = datalen; - /* copy decrypted data to user */ - if (copy_to_user(buffer, data, datalen) != 0) - ret = -EFAULT; + /* copy out decrypted data */ + memcpy(buffer, buf->virt, datalen); err_fput: fput(file); error: - kzfree(data); + big_key_free_buffer(buf); } else { ret = datalen; - if (copy_to_user(buffer, key->payload.data[big_key_data], - datalen) != 0) - ret = -EFAULT; + memcpy(buffer, key->payload.data[big_key_data], datalen); } return ret; diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index d92cbf9687c33f090865f6d0caa99d9936f49e3a..571f6d486838ec2af8af5fecc6928ac7b5fcd785 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -895,14 +895,14 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) } /* - * encrypted_read - format and copy the encrypted data to userspace + * encrypted_read - format and copy out the encrypted data * * The resulting datablob format is: * * * On success, return to userspace the encrypted key datablob size. */ -static long encrypted_read(const struct key *key, char __user *buffer, +static long encrypted_read(const struct key *key, char *buffer, size_t buflen) { struct encrypted_key_payload *epayload; @@ -950,8 +950,7 @@ static long encrypted_read(const struct key *key, char __user *buffer, key_put(mkey); memzero_explicit(derived_key, sizeof(derived_key)); - if (copy_to_user(buffer, ascii_buf, asciiblob_len) != 0) - ret = -EFAULT; + memcpy(buffer, ascii_buf, asciiblob_len); kzfree(ascii_buf); return asciiblob_len; diff --git a/security/keys/internal.h b/security/keys/internal.h index e3a573840186667bed09d8e1c790245abec34e67..7ed723d858499ea9cbe0a580e3fa2ea459f400f3 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -20,6 +20,8 @@ #include #include #include +#include +#include struct iovec; @@ -158,8 +160,6 @@ extern struct key *request_key_and_link(struct key_type *type, extern bool lookup_user_key_possessed(const struct key *key, const struct key_match_data *match_data); -extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, - key_perm_t perm); #define KEY_LOOKUP_CREATE 0x01 #define KEY_LOOKUP_PARTIAL 0x02 #define KEY_LOOKUP_FOR_UNLINK 0x04 @@ -305,4 +305,14 @@ static inline void key_check(const struct key *key) #endif +/* + * Helper function to clear and free a kvmalloc'ed memory object. + */ +static inline void __kvzfree(const void *addr, size_t len) +{ + if (addr) { + memset((void *)addr, 0, len); + kvfree(addr); + } +} #endif /* _INTERNAL_H */ diff --git a/security/keys/key.c b/security/keys/key.c index 17244f5f54c6987cafdb656b62a65d54e15dbb68..5f4cb271464a06bbb3a46fb33ed746d46dae4e5a 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -383,7 +383,7 @@ int key_payload_reserve(struct key *key, size_t datalen) spin_lock(&key->user->lock); if (delta > 0 && - (key->user->qnbytes + delta >= maxbytes || + (key->user->qnbytes + delta > maxbytes || key->user->qnbytes + delta < key->user->qnbytes)) { ret = -EDQUOT; } diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index ca31af186abd8a71aa2721d06ede9e7f30970974..c07c2e2b2478399328d3d8b445adc2d837079257 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -330,7 +330,7 @@ long keyctl_update_key(key_serial_t id, payload = NULL; if (plen) { ret = -ENOMEM; - payload = kmalloc(plen, GFP_KERNEL); + payload = kvmalloc(plen, GFP_KERNEL); if (!payload) goto error; @@ -351,7 +351,7 @@ long keyctl_update_key(key_serial_t id, key_ref_put(key_ref); error2: - kzfree(payload); + __kvzfree(payload, plen); error: return ret; } @@ -742,6 +742,21 @@ long keyctl_keyring_search(key_serial_t ringid, return ret; } +/* + * Call the read method + */ +static long __keyctl_read_key(struct key *key, char *buffer, size_t buflen) +{ + long ret; + + down_read(&key->sem); + ret = key_validate(key); + if (ret == 0) + ret = key->type->read(key, buffer, buflen); + up_read(&key->sem); + return ret; +} + /* * Read a key's payload. * @@ -757,26 +772,28 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) struct key *key; key_ref_t key_ref; long ret; + char *key_data = NULL; + size_t key_data_len; /* find the key first */ key_ref = lookup_user_key(keyid, 0, 0); if (IS_ERR(key_ref)) { ret = -ENOKEY; - goto error; + goto out; } key = key_ref_to_ptr(key_ref); ret = key_read_state(key); if (ret < 0) - goto error2; /* Negatively instantiated */ + goto key_put_out; /* Negatively instantiated */ /* see if we can read it directly */ ret = key_permission(key_ref, KEY_NEED_READ); if (ret == 0) goto can_read_key; if (ret != -EACCES) - goto error2; + goto key_put_out; /* we can't; see if it's searchable from this process's keyrings * - we automatically take account of the fact that it may be @@ -784,26 +801,78 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) */ if (!is_key_possessed(key_ref)) { ret = -EACCES; - goto error2; + goto key_put_out; } /* the key is probably readable - now try to read it */ can_read_key: - ret = -EOPNOTSUPP; - if (key->type->read) { - /* Read the data with the semaphore held (since we might sleep) - * to protect against the key being updated or revoked. + if (!key->type->read) { + ret = -EOPNOTSUPP; + goto key_put_out; + } + + if (!buffer || !buflen) { + /* Get the key length from the read method */ + ret = __keyctl_read_key(key, NULL, 0); + goto key_put_out; + } + + /* + * Read the data with the semaphore held (since we might sleep) + * to protect against the key being updated or revoked. + * + * Allocating a temporary buffer to hold the keys before + * transferring them to user buffer to avoid potential + * deadlock involving page fault and mmap_sem. + * + * key_data_len = (buflen <= PAGE_SIZE) + * ? buflen : actual length of key data + * + * This prevents allocating arbitrary large buffer which can + * be much larger than the actual key length. In the latter case, + * at least 2 passes of this loop is required. + */ + key_data_len = (buflen <= PAGE_SIZE) ? buflen : 0; + for (;;) { + if (key_data_len) { + key_data = kvmalloc(key_data_len, GFP_KERNEL); + if (!key_data) { + ret = -ENOMEM; + goto key_put_out; + } + } + + ret = __keyctl_read_key(key, key_data, key_data_len); + + /* + * Read methods will just return the required length without + * any copying if the provided length isn't large enough. + */ + if (ret <= 0 || ret > buflen) + break; + + /* + * The key may change (unlikely) in between 2 consecutive + * __keyctl_read_key() calls. In this case, we reallocate + * a larger buffer and redo the key read when + * key_data_len < ret <= buflen. */ - down_read(&key->sem); - ret = key_validate(key); - if (ret == 0) - ret = key->type->read(key, buffer, buflen); - up_read(&key->sem); + if (ret > key_data_len) { + if (unlikely(key_data)) + __kvzfree(key_data, key_data_len); + key_data_len = ret; + continue; /* Allocate buffer */ + } + + if (copy_to_user(buffer, key_data, ret)) + ret = -EFAULT; + break; } + __kvzfree(key_data, key_data_len); -error2: +key_put_out: key_put(key); -error: +out: return ret; } @@ -882,8 +951,8 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group) key_quota_root_maxbytes : key_quota_maxbytes; spin_lock(&newowner->lock); - if (newowner->qnkeys + 1 >= maxkeys || - newowner->qnbytes + key->quotalen >= maxbytes || + if (newowner->qnkeys + 1 > maxkeys || + newowner->qnbytes + key->quotalen > maxbytes || newowner->qnbytes + key->quotalen < newowner->qnbytes) goto quota_overrun; diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 359b9cba3d0de894c7914c41b7a48b4213259a8f..f7cf371bcd2a8588f56e059ba6ab548c9d60063b 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -432,7 +432,6 @@ static int keyring_read_iterator(const void *object, void *data) { struct keyring_read_iterator_context *ctx = data; const struct key *key = keyring_ptr_to_key(object); - int ret; kenter("{%s,%d},,{%zu/%zu}", key->type->name, key->serial, ctx->count, ctx->buflen); @@ -440,10 +439,7 @@ static int keyring_read_iterator(const void *object, void *data) if (ctx->count >= ctx->buflen) return 1; - ret = put_user(key->serial, ctx->buffer); - if (ret < 0) - return ret; - ctx->buffer++; + *ctx->buffer++ = key->serial; ctx->count += sizeof(key->serial); return 0; } diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 5f2993ab2d504cf1a1415ae91233d0d7643383bb..8968a3db1add914b3f3037f5c744d4f4a4acf354 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -756,6 +756,7 @@ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags, put_cred(ctx.cred); goto try_again; } +EXPORT_SYMBOL(lookup_user_key); /* * Join the named keyring as the session keyring if possible else attempt to diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 1d34b2a5f485e2380e8b092cfb62955434c3f908..13ac3b1e57da61ea6bb8af8f46c875fe61e38a7d 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -27,7 +27,7 @@ static int request_key_auth_instantiate(struct key *, static void request_key_auth_describe(const struct key *, struct seq_file *); static void request_key_auth_revoke(struct key *); static void request_key_auth_destroy(struct key *); -static long request_key_auth_read(const struct key *, char __user *, size_t); +static long request_key_auth_read(const struct key *, char *, size_t); /* * The request-key authorisation key type definition. @@ -85,7 +85,7 @@ static void request_key_auth_describe(const struct key *key, * - the key's semaphore is read-locked */ static long request_key_auth_read(const struct key *key, - char __user *buffer, size_t buflen) + char *buffer, size_t buflen) { struct request_key_auth *rka = get_request_key_auth(key); size_t datalen; @@ -102,8 +102,7 @@ static long request_key_auth_read(const struct key *key, if (buflen > datalen) buflen = datalen; - if (copy_to_user(buffer, rka->callout_info, buflen) != 0) - ret = -EFAULT; + memcpy(buffer, rka->callout_info, buflen); } return ret; diff --git a/security/keys/trusted.c b/security/keys/trusted.c index 98aa89ff7bfd9ed57662116ca455f3a5628ec380..01e8544f79a539b7ded48c9c17fc88348f9e6bac 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -1136,11 +1136,10 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep) * trusted_read - copy the sealed blob data to userspace in hex. * On success, return to userspace the trusted key datablob size. */ -static long trusted_read(const struct key *key, char __user *buffer, +static long trusted_read(const struct key *key, char *buffer, size_t buflen) { const struct trusted_key_payload *p; - char *ascii_buf; char *bufp; int i; @@ -1149,18 +1148,9 @@ static long trusted_read(const struct key *key, char __user *buffer, return -EINVAL; if (buffer && buflen >= 2 * p->blob_len) { - ascii_buf = kmalloc(2 * p->blob_len, GFP_KERNEL); - if (!ascii_buf) - return -ENOMEM; - - bufp = ascii_buf; + bufp = buffer; for (i = 0; i < p->blob_len; i++) bufp = hex_byte_pack(bufp, p->blob[i]); - if (copy_to_user(buffer, ascii_buf, 2 * p->blob_len) != 0) { - kzfree(ascii_buf); - return -EFAULT; - } - kzfree(ascii_buf); } return 2 * p->blob_len; } diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index 9f558bedba23a338da5980ab11dd1b716c7b0b67..0e723b676aef1c7e5bfcb4abf6d75e531513b2da 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -172,7 +172,7 @@ EXPORT_SYMBOL_GPL(user_describe); * read the key data * - the key's semaphore is read-locked */ -long user_read(const struct key *key, char __user *buffer, size_t buflen) +long user_read(const struct key *key, char *buffer, size_t buflen) { const struct user_key_payload *upayload; long ret; @@ -185,8 +185,7 @@ long user_read(const struct key *key, char __user *buffer, size_t buflen) if (buflen > upayload->datalen) buflen = upayload->datalen; - if (copy_to_user(buffer, upayload->data, buflen) != 0) - ret = -EFAULT; + memcpy(buffer, upayload->data, buflen); } return ret; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 84f89ab1f2b594cbb84a29dcc8acc1df3754cefb..21f5c139988d63e14c840f9683a9be280a3d7f74 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5279,40 +5279,60 @@ static int selinux_tun_dev_open(void *security) static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) { - int err = 0; - u32 perm; + int rc = 0; + unsigned int msg_len; + unsigned int data_len = skb->len; + unsigned char *data = skb->data; struct nlmsghdr *nlh; struct sk_security_struct *sksec = sk->sk_security; + u16 sclass = sksec->sclass; + u32 perm; - if (skb->len < NLMSG_HDRLEN) { - err = -EINVAL; - goto out; - } - nlh = nlmsg_hdr(skb); + while (data_len >= nlmsg_total_size(0)) { + nlh = (struct nlmsghdr *)data; - err = selinux_nlmsg_lookup(sksec->sclass, nlh->nlmsg_type, &perm); - if (err) { - if (err == -EINVAL) { + /* NOTE: the nlmsg_len field isn't reliably set by some netlink + * users which means we can't reject skb's with bogus + * length fields; our solution is to follow what + * netlink_rcv_skb() does and simply skip processing at + * messages with length fields that are clearly junk + */ + if (nlh->nlmsg_len < NLMSG_HDRLEN || nlh->nlmsg_len > data_len) + return 0; + + rc = selinux_nlmsg_lookup(sclass, nlh->nlmsg_type, &perm); + if (rc == 0) { + rc = sock_has_perm(sk, perm); + if (rc) + return rc; + } else if (rc == -EINVAL) { + /* -EINVAL is a missing msg/perm mapping */ pr_warn_ratelimited("SELinux: unrecognized netlink" - " message: protocol=%hu nlmsg_type=%hu sclass=%s" - " pig=%d comm=%s\n", - sk->sk_protocol, nlh->nlmsg_type, - secclass_map[sksec->sclass - 1].name, - task_pid_nr(current), current->comm); - if (!enforcing_enabled(&selinux_state) || - security_get_allow_unknown(&selinux_state)) - err = 0; + " message: protocol=%hu nlmsg_type=%hu sclass=%s" + " pid=%d comm=%s\n", + sk->sk_protocol, nlh->nlmsg_type, + secclass_map[sclass - 1].name, + task_pid_nr(current), current->comm); + if (enforcing_enabled(&selinux_state)&& + !security_get_allow_unknown(&selinux_state)) + return rc; + rc = 0; + } else if (rc == -ENOENT) { + /* -ENOENT is a missing socket/class mapping, ignore */ + rc = 0; + } else { + return rc; } - /* Ignore */ - if (err == -ENOENT) - err = 0; - goto out; + /* move to the next message after applying netlink padding */ + msg_len = NLMSG_ALIGN(nlh->nlmsg_len); + if (msg_len >= data_len) + return 0; + data_len -= msg_len; + data += msg_len; } - err = sock_has_perm(sk, perm); -out: - return err; + return rc; } #ifdef CONFIG_NETFILTER diff --git a/sound/core/control.c b/sound/core/control.c index 36571cd49be338131465b2188a680cd74efb82a2..a0ce22164957c3b6baa497cf1bfdf5a71551b02c 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1467,8 +1467,9 @@ static int call_tlv_handler(struct snd_ctl_file *file, int op_flag, if (kctl->tlv.c == NULL) return -ENXIO; - /* When locked, this is unavailable. */ - if (vd->owner != NULL && vd->owner != file) + /* Write and command operations are not allowed for locked element. */ + if (op_flag != SNDRV_CTL_TLV_OP_READ && + vd->owner != NULL && vd->owner != file) return -EPERM; return kctl->tlv.c(kctl, op_flag, size, buf); diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index b8ab46b8298ded7d45a094b728d3e393a4348804..c1315ce98b54831fd0af53c711484dec916cb034 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -111,7 +111,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->next) { if (plugin->dst_frames) frames = plugin->dst_frames(plugin, frames); - if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) + if ((snd_pcm_sframes_t)frames <= 0) return -ENXIO; plugin = plugin->next; err = snd_pcm_plugin_alloc(plugin, frames); @@ -123,7 +123,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->prev) { if (plugin->src_frames) frames = plugin->src_frames(plugin, frames); - if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) + if ((snd_pcm_sframes_t)frames <= 0) return -ENXIO; plugin = plugin->prev; err = snd_pcm_plugin_alloc(plugin, frames); @@ -196,7 +196,9 @@ int snd_pcm_plugin_free(struct snd_pcm_plugin *plugin) return 0; } -snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_pcm_uframes_t drv_frames) +static snd_pcm_sframes_t plug_client_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t drv_frames, + bool check_size) { struct snd_pcm_plugin *plugin, *plugin_prev, *plugin_next; int stream; @@ -212,12 +214,18 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p plugin_prev = plugin->prev; if (plugin->src_frames) drv_frames = plugin->src_frames(plugin, drv_frames); + if (check_size && plugin->buf_frames && + drv_frames > plugin->buf_frames) + drv_frames = plugin->buf_frames; plugin = plugin_prev; } } else if (stream == SNDRV_PCM_STREAM_CAPTURE) { plugin = snd_pcm_plug_first(plug); while (plugin && drv_frames > 0) { plugin_next = plugin->next; + if (check_size && plugin->buf_frames && + drv_frames > plugin->buf_frames) + drv_frames = plugin->buf_frames; if (plugin->dst_frames) drv_frames = plugin->dst_frames(plugin, drv_frames); plugin = plugin_next; @@ -227,7 +235,9 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p return drv_frames; } -snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pcm_uframes_t clt_frames) +static snd_pcm_sframes_t plug_slave_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t clt_frames, + bool check_size) { struct snd_pcm_plugin *plugin, *plugin_prev, *plugin_next; snd_pcm_sframes_t frames; @@ -243,6 +253,9 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc plugin = snd_pcm_plug_first(plug); while (plugin && frames > 0) { plugin_next = plugin->next; + if (check_size && plugin->buf_frames && + frames > plugin->buf_frames) + frames = plugin->buf_frames; if (plugin->dst_frames) { frames = plugin->dst_frames(plugin, frames); if (frames < 0) @@ -259,6 +272,9 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc if (frames < 0) return frames; } + if (check_size && plugin->buf_frames && + frames > plugin->buf_frames) + frames = plugin->buf_frames; plugin = plugin_prev; } } else @@ -266,6 +282,18 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc return frames; } +snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t drv_frames) +{ + return plug_client_size(plug, drv_frames, false); +} + +snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, + snd_pcm_uframes_t clt_frames) +{ + return plug_slave_size(plug, clt_frames, false); +} + static int snd_pcm_plug_formats(const struct snd_mask *mask, snd_pcm_format_t format) { @@ -621,7 +649,7 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st src_channels = dst_channels; plugin = next; } - return snd_pcm_plug_client_size(plug, frames); + return plug_client_size(plug, frames, true); } snd_pcm_sframes_t snd_pcm_plug_read_transfer(struct snd_pcm_substream *plug, struct snd_pcm_plugin_channel *dst_channels_final, snd_pcm_uframes_t size) @@ -631,7 +659,7 @@ snd_pcm_sframes_t snd_pcm_plug_read_transfer(struct snd_pcm_substream *plug, str snd_pcm_sframes_t frames = size; int err; - frames = snd_pcm_plug_slave_size(plug, frames); + frames = plug_slave_size(plug, frames, true); if (frames < 0) return frames; diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index 9debd1b8fd2880fde1e0fe349a4745bdb443b477..cdfb8f92d5545c215ddd477abdf43f660db151f3 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -615,6 +615,7 @@ send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, struct seq len = snd_seq_oss_timer_start(dp->timer); if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { snd_seq_oss_readq_sysex(dp->readq, mdev->seq_device, ev); + snd_midi_event_reset_decode(mdev->coder); } else { len = snd_midi_event_decode(mdev->coder, msg, sizeof(msg), ev); if (len > 0) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 92b0d4523a07a3b9d5af03da563182f94d9bdbda..6fe93d5f6f7142db546365aafffd3073a1a2d67d 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -564,7 +564,7 @@ static int update_timestamp_of_queue(struct snd_seq_event *event, event->queue = queue; event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK; if (real_time) { - event->time.time = snd_seq_timer_get_cur_time(q->timer); + event->time.time = snd_seq_timer_get_cur_time(q->timer, true); event->flags |= SNDRV_SEQ_TIME_STAMP_REAL; } else { event->time.tick = snd_seq_timer_get_cur_tick(q->timer); @@ -1639,7 +1639,7 @@ static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client, tmr = queue->timer; status->events = queue->tickq->cells + queue->timeq->cells; - status->time = snd_seq_timer_get_cur_time(tmr); + status->time = snd_seq_timer_get_cur_time(tmr, true); status->tick = snd_seq_timer_get_cur_tick(tmr); status->running = tmr->running; diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 1a6dc4ff44a69bb53e4b8ab9009b957ea7fd8689..ea1aa079627618ffae4ca3924a15c941bf40e06f 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -261,6 +261,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) { unsigned long flags; struct snd_seq_event_cell *cell; + snd_seq_tick_time_t cur_tick; + snd_seq_real_time_t cur_time; if (q == NULL) return; @@ -277,17 +279,18 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) __again: /* Process tick queue... */ + cur_tick = snd_seq_timer_get_cur_tick(q->timer); for (;;) { - cell = snd_seq_prioq_cell_out(q->tickq, - &q->timer->tick.cur_tick); + cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); } /* Process time queue... */ + cur_time = snd_seq_timer_get_cur_time(q->timer, false); for (;;) { - cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time); + cell = snd_seq_prioq_cell_out(q->timeq, &cur_time); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); @@ -415,6 +418,7 @@ int snd_seq_queue_check_access(int queueid, int client) int snd_seq_queue_set_owner(int queueid, int client, int locked) { struct snd_seq_queue *q = queueptr(queueid); + unsigned long flags; if (q == NULL) return -EINVAL; @@ -424,8 +428,10 @@ int snd_seq_queue_set_owner(int queueid, int client, int locked) return -EPERM; } + spin_lock_irqsave(&q->owner_lock, flags); q->locked = locked ? 1 : 0; q->owner = client; + spin_unlock_irqrestore(&q->owner_lock, flags); queue_access_unlock(q); queuefree(q); @@ -564,15 +570,17 @@ void snd_seq_queue_client_termination(int client) unsigned long flags; int i; struct snd_seq_queue *q; + bool matched; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) continue; spin_lock_irqsave(&q->owner_lock, flags); - if (q->owner == client) + matched = (q->owner == client); + if (matched) q->klocked = 1; spin_unlock_irqrestore(&q->owner_lock, flags); - if (q->owner == client) { + if (matched) { if (q->timer->running) snd_seq_timer_stop(q->timer); snd_seq_timer_reset(q->timer); @@ -764,6 +772,8 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, int i, bpm; struct snd_seq_queue *q; struct snd_seq_timer *tmr; + bool locked; + int owner; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) @@ -775,9 +785,14 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, else bpm = 0; + spin_lock_irq(&q->owner_lock); + locked = q->locked; + owner = q->owner; + spin_unlock_irq(&q->owner_lock); + snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name); - snd_iprintf(buffer, "owned by client : %d\n", q->owner); - snd_iprintf(buffer, "lock status : %s\n", q->locked ? "Locked" : "Free"); + snd_iprintf(buffer, "owned by client : %d\n", owner); + snd_iprintf(buffer, "lock status : %s\n", locked ? "Locked" : "Free"); snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq)); snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq)); snd_iprintf(buffer, "timer state : %s\n", tmr->running ? "Running" : "Stopped"); diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index 0e1feb597586fbcab9278b4ed53367a294071c7d..bd5e5a5d52a8d459cc86a30e3c770a433918d9ad 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -436,14 +436,15 @@ int snd_seq_timer_continue(struct snd_seq_timer *tmr) } /* return current 'real' time. use timeofday() to get better granularity. */ -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime) { snd_seq_real_time_t cur_time; unsigned long flags; spin_lock_irqsave(&tmr->lock, flags); cur_time = tmr->cur_time; - if (tmr->running) { + if (adjust_ktime && tmr->running) { struct timespec64 tm; ktime_get_ts64(&tm); @@ -460,7 +461,13 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) high PPQ values) */ snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr) { - return tmr->tick.cur_tick; + snd_seq_tick_time_t cur_tick; + unsigned long flags; + + spin_lock_irqsave(&tmr->lock, flags); + cur_tick = tmr->tick.cur_tick; + spin_unlock_irqrestore(&tmr->lock, flags); + return cur_tick; } diff --git a/sound/core/seq/seq_timer.h b/sound/core/seq/seq_timer.h index 9506b661fe5bfcf056c97799a39e1ba9dec8f029..5d47d559465e6b3f8211fc602b509368546a4d16 100644 --- a/sound/core/seq/seq_timer.h +++ b/sound/core/seq/seq_timer.h @@ -135,7 +135,8 @@ int snd_seq_timer_set_ppq(struct snd_seq_timer *tmr, int ppq); int snd_seq_timer_set_position_tick(struct snd_seq_timer *tmr, snd_seq_tick_time_t position); int snd_seq_timer_set_position_time(struct snd_seq_timer *tmr, snd_seq_real_time_t position); int snd_seq_timer_set_skew(struct snd_seq_timer *tmr, unsigned int skew, unsigned int base); -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr); +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime); snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr); extern int seq_default_timer_class; diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index 8ebbca554e995f47eff787d0ffd36fc244abf7d8..dd958d76ca5de2939433d89866ee9761b813e014 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -95,6 +95,7 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) continue; snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)snd_rawmidi_receive, vmidi->substream); + snd_midi_event_reset_decode(vmidi->parser); } else { len = snd_midi_event_decode(vmidi->parser, msg, sizeof(msg), ev); if (len > 0) diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c index f21633cd9b38ea9a8f6a3c955f9209a8f66ecf2f..acbe61b8db7b26b9fa535fb41f3845b612ac0e19 100644 --- a/sound/hda/hdmi_chmap.c +++ b/sound/hda/hdmi_chmap.c @@ -249,7 +249,7 @@ void snd_hdac_print_channel_allocation(int spk_alloc, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(cea_speaker_allocation_names); i++) { if (spk_alloc & (1 << i)) - j += snprintf(buf + j, buflen - j, " %s", + j += scnprintf(buf + j, buflen - j, " %s", cea_speaker_allocation_names[i]); } buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index 8894c7c18ad673c8bcebc698f5839e336b68d30d..d92c3c6b6051deb47c1f6958339ebd299fd78e5b 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -875,10 +875,13 @@ static void snd_miro_write(struct snd_miro *chip, unsigned char reg, spin_unlock_irqrestore(&chip->lock, flags); } +static inline void snd_miro_write_mask(struct snd_miro *chip, + unsigned char reg, unsigned char value, unsigned char mask) +{ + unsigned char oldval = snd_miro_read(chip, reg); -#define snd_miro_write_mask(chip, reg, value, mask) \ - snd_miro_write(chip, reg, \ - (snd_miro_read(chip, reg) & ~(mask)) | ((value) & (mask))) + snd_miro_write(chip, reg, (oldval & ~mask) | (value & mask)); +} /* * Proc Interface diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 505cd81e19fa5a40be7d5a9302bb1c0c7a867d00..4ef3caaf4354b5cf62c16b36f9f009d9096b5d0e 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -327,10 +327,13 @@ static void snd_opti9xx_write(struct snd_opti9xx *chip, unsigned char reg, } -#define snd_opti9xx_write_mask(chip, reg, value, mask) \ - snd_opti9xx_write(chip, reg, \ - (snd_opti9xx_read(chip, reg) & ~(mask)) | ((value) & (mask))) +static inline void snd_opti9xx_write_mask(struct snd_opti9xx *chip, + unsigned char reg, unsigned char value, unsigned char mask) +{ + unsigned char oldval = snd_opti9xx_read(chip, reg); + snd_opti9xx_write(chip, reg, (oldval & ~mask) | (value & mask)); +} static int snd_opti9xx_configure(struct snd_opti9xx *chip, long port, diff --git a/sound/pci/hda/hda_beep.c b/sound/pci/hda/hda_beep.c index c397e7da0eacf9808d136ccdb8ef40e583e58b19..7ccfb09535e14acf46193a4423fa38d4f9893976 100644 --- a/sound/pci/hda/hda_beep.c +++ b/sound/pci/hda/hda_beep.c @@ -310,8 +310,12 @@ int snd_hda_mixer_amp_switch_get_beep(struct snd_kcontrol *kcontrol, { struct hda_codec *codec = snd_kcontrol_chip(kcontrol); struct hda_beep *beep = codec->beep; + int chs = get_amp_channels(kcontrol); + if (beep && (!beep->enabled || !ctl_has_mute(kcontrol))) { - ucontrol->value.integer.value[0] = + if (chs & 1) + ucontrol->value.integer.value[0] = beep->enabled; + if (chs & 2) ucontrol->value.integer.value[1] = beep->enabled; return 0; } diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index e3f3351da480b2ed64fe08cfe1e40a86593c7e25..7d65fe31c825731279790774aa8692ec51b1866d 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -942,6 +942,7 @@ int snd_hda_codec_new(struct hda_bus *bus, struct snd_card *card, /* power-up all before initialization */ hda_set_power_state(codec, AC_PWRST_D0); + codec->core.dev.power.power_state = PMSG_ON; snd_hda_codec_proc_new(codec); @@ -4002,7 +4003,7 @@ void snd_print_pcm_bits(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(bits); i++) if (pcm & (AC_SUPPCM_BITS_8 << i)) - j += snprintf(buf + j, buflen - j, " %d", bits[i]); + j += scnprintf(buf + j, buflen - j, " %d", bits[i]); buf[j] = '\0'; /* necessary when j == 0 */ } diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c index ba7fe9b6655c1f25279e5489081f34327f215d45..864cc8c9ada0ae6112445d3a6622f337067e778f 100644 --- a/sound/pci/hda/hda_eld.c +++ b/sound/pci/hda/hda_eld.c @@ -373,7 +373,7 @@ static void hdmi_print_pcm_rates(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(alsa_rates); i++) if (pcm & (1 << i)) - j += snprintf(buf + j, buflen - j, " %d", + j += scnprintf(buf + j, buflen - j, " %d", alsa_rates[i]); buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 890793ad85ca13b627dbfb5529045c6923793d8e..7779f54607156d7c5e36a412fe3752b72cfe1f63 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2034,24 +2034,15 @@ static void azx_firmware_cb(const struct firmware *fw, void *context) { struct snd_card *card = context; struct azx *chip = card->private_data; - struct pci_dev *pci = chip->pci; - - if (!fw) { - dev_err(card->dev, "Cannot load firmware, aborting\n"); - goto error; - } - chip->fw = fw; + if (fw) + chip->fw = fw; + else + dev_err(card->dev, "Cannot load firmware, continue without patching\n"); if (!chip->disabled) { /* continue probing */ - if (azx_probe_continue(chip)) - goto error; + azx_probe_continue(chip); } - return; /* OK */ - - error: - snd_card_free(card); - pci_set_drvdata(pci, NULL); } #endif @@ -2177,6 +2168,17 @@ static const struct hdac_io_ops pci_hda_io_ops = { .dma_free_pages = dma_free_pages, }; +/* Blacklist for skipping the whole probe: + * some HD-audio PCI entries are exposed without any codecs, and such devices + * should be ignored from the beginning. + */ +static const struct pci_device_id driver_blacklist[] = { + { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */ + { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */ + { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */ + {} +}; + static const struct hda_controller_ops pci_hda_ops = { .disable_msi_reset_irq = disable_msi_reset_irq, .substream_alloc_pages = substream_alloc_pages, @@ -2196,6 +2198,11 @@ static int azx_probe(struct pci_dev *pci, bool schedule_probe; int err; + if (pci_match_id(driver_blacklist, pci)) { + dev_info(&pci->dev, "Skipping the blacklisted device\n"); + return -ENODEV; + } + if (dev >= SNDRV_CARDS) return -ENODEV; if (!enable[dev]) { diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c index 9b7efece4484a65eda479511dd00d3a02abacf00..2a173de7ca023fd35fbf566aa509ab379d09a0cc 100644 --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -221,7 +221,7 @@ static ssize_t init_verbs_show(struct device *dev, mutex_lock(&codec->user_mutex); for (i = 0; i < codec->init_verbs.used; i++) { struct hda_verb *v = snd_array_elem(&codec->init_verbs, i); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "0x%02x 0x%03x 0x%04x\n", v->nid, v->verb, v->param); } @@ -271,7 +271,7 @@ static ssize_t hints_show(struct device *dev, mutex_lock(&codec->user_mutex); for (i = 0; i < codec->hints.used; i++) { struct hda_hint *hint = snd_array_elem(&codec->hints, i); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "%s = %s\n", hint->key, hint->val); } mutex_unlock(&codec->user_mutex); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 382b6d2ed80366e2268e97f5602d2aa4e3759eb4..9cc9304ff21a728da62ffdb21567f050053c302d 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -969,6 +969,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x215f, "Lenovo T510", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21ce, "Lenovo T420", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21cf, "Lenovo T520", CXT_PINCFG_LENOVO_TP410), + SND_PCI_QUIRK(0x17aa, 0x21d2, "Lenovo T420s", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD), diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index f214055972150d7875c4e01a41a6c79e663a448e..435c0efb9bf29205c240e3d4f807a94376fc4215 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1849,8 +1849,10 @@ static bool check_non_pcm_per_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) /* Add sanity check to pass klockwork check. * This should never happen. */ - if (WARN_ON(spdif == NULL)) + if (WARN_ON(spdif == NULL)) { + mutex_unlock(&codec->spdif_mutex); return true; + } non_pcm = !!(spdif->status & IEC958_AES0_NONAUDIO); mutex_unlock(&codec->spdif_mutex); return non_pcm; @@ -2574,9 +2576,12 @@ static int alloc_intel_hdmi(struct hda_codec *codec) /* parse and post-process for Intel codecs */ static int parse_intel_hdmi(struct hda_codec *codec) { - int err; + int err, retries = 3; + + do { + err = hdmi_parse_codec(codec); + } while (err < 0 && retries--); - err = hdmi_parse_codec(codec); if (err < 0) { generic_spec_free(codec); return err; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5a7afbeb612d59037e5fe320d1b84122f2ba13c3..b2aec97414fb86c42a45ce924aa96cf8f8c16317 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -333,7 +333,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0215: case 0x10ec0233: case 0x10ec0235: + case 0x10ec0236: + case 0x10ec0245: case 0x10ec0255: + case 0x10ec0256: case 0x10ec0257: case 0x10ec0282: case 0x10ec0283: @@ -345,11 +348,6 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0300: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; - case 0x10ec0236: - case 0x10ec0256: - alc_write_coef_idx(codec, 0x36, 0x5757); - alc_update_coef_idx(codec, 0x10, 1<<9, 0); - break; case 0x10ec0275: alc_update_coef_idx(codec, 0xe, 0, 1<<0); break; @@ -3122,7 +3120,13 @@ static void alc256_init(struct hda_codec *codec) alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x4); /* Hight power */ alc_update_coefex_idx(codec, 0x53, 0x02, 0x8000, 1 << 15); /* Clear bit */ alc_update_coefex_idx(codec, 0x53, 0x02, 0x8000, 0 << 15); - alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/ + /* + * Expose headphone mic (or possibly Line In on some machines) instead + * of PC Beep on 1Ah, and disable 1Ah loopback for all outputs. See + * Documentation/sound/hd-audio/realtek-pc-beep.rst for details of + * this register. + */ + alc_write_coef_idx(codec, 0x36, 0x5757); } static void alc256_shutup(struct hda_codec *codec) @@ -4687,6 +4691,8 @@ static void alc_determine_headset_type(struct hda_codec *codec) is_ctia = (val & 0x1c02) == 0x1c02; break; case 0x10ec0225: + codec->power_save_node = 1; + /* fall through */ case 0x10ec0295: case 0x10ec0299: alc_process_coef_fw(codec, alc225_pre_hsmode); @@ -6584,6 +6590,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), + SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE), @@ -7259,6 +7266,7 @@ static int patch_alc269(struct hda_codec *codec) spec->gen.mixer_nid = 0; break; case 0x10ec0215: + case 0x10ec0245: case 0x10ec0285: case 0x10ec0289: spec->codec_variant = ALC269_TYPE_ALC215; @@ -8339,6 +8347,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0234, "ALC234", patch_alc269), HDA_CODEC_ENTRY(0x10ec0235, "ALC233", patch_alc269), HDA_CODEC_ENTRY(0x10ec0236, "ALC236", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0245, "ALC245", patch_alc269), HDA_CODEC_ENTRY(0x10ec0255, "ALC255", patch_alc269), HDA_CODEC_ENTRY(0x10ec0256, "ALC256", patch_alc269), HDA_CODEC_ENTRY(0x10ec0257, "ALC257", patch_alc269), diff --git a/sound/pci/ice1712/prodigy_hifi.c b/sound/pci/ice1712/prodigy_hifi.c index 2697402b5195799243e115fef1996c7293eb773d..41f6450a2539f44734e228a099d843547a1f7280 100644 --- a/sound/pci/ice1712/prodigy_hifi.c +++ b/sound/pci/ice1712/prodigy_hifi.c @@ -569,7 +569,7 @@ static int wm_adc_mux_enum_get(struct snd_kcontrol *kcontrol, struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol); mutex_lock(&ice->gpio_mutex); - ucontrol->value.integer.value[0] = wm_get(ice, WM_ADC_MUX) & 0x1f; + ucontrol->value.enumerated.item[0] = wm_get(ice, WM_ADC_MUX) & 0x1f; mutex_unlock(&ice->gpio_mutex); return 0; } @@ -583,7 +583,7 @@ static int wm_adc_mux_enum_put(struct snd_kcontrol *kcontrol, mutex_lock(&ice->gpio_mutex); oval = wm_get(ice, WM_ADC_MUX); - nval = (oval & 0xe0) | ucontrol->value.integer.value[0]; + nval = (oval & 0xe0) | ucontrol->value.enumerated.item[0]; if (nval != oval) { wm_put(ice, WM_ADC_MUX, nval); change = 1; diff --git a/sound/sh/aica.c b/sound/sh/aica.c index fdc680ae8aa09c037ac01cc6623e6f6d15797993..d9acf551a8985c58de15ad752b050d959bdb8aa7 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -117,10 +117,10 @@ static void spu_memset(u32 toi, u32 what, int length) } /* spu_memload - write to SPU address space */ -static void spu_memload(u32 toi, void *from, int length) +static void spu_memload(u32 toi, const void *from, int length) { unsigned long flags; - u32 *froml = from; + const u32 *froml = from; u32 __iomem *to = (u32 __iomem *) (SPU_MEMORY_BASE + toi); int i; u32 val; diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c index 834b2574786f50da6e986ff24eeffe8b3d3178db..6251b5e1b64a2b0594ee0e2095456ba5511d2df6 100644 --- a/sound/sh/sh_dac_audio.c +++ b/sound/sh/sh_dac_audio.c @@ -190,7 +190,6 @@ static int snd_sh_dac_pcm_copy(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; if (copy_from_user_toio(chip->data_buffer + pos, src, count)) return -EFAULT; @@ -210,7 +209,6 @@ static int snd_sh_dac_pcm_copy_kernel(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; memcpy_toio(chip->data_buffer + pos, src, count); chip->buffer_end = chip->data_buffer + pos + count; @@ -229,7 +227,6 @@ static int snd_sh_dac_pcm_silence(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; memset_io(chip->data_buffer + pos, 0, count); chip->buffer_end = chip->data_buffer + pos + count; diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index 4a56f3dfba5132a01bbb49eb985540d16a944ecf..23887613b5c39325a35a8f1e56713926bfbfbacd 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -25,6 +25,8 @@ config SND_ATMEL_SOC_DMA config SND_ATMEL_SOC_SSC_DMA tristate + select SND_ATMEL_SOC_DMA + select SND_ATMEL_SOC_PDC config SND_ATMEL_SOC_SSC tristate diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index 1c3626347e12b561549bf8276851f3c6cd399daf..aeeec1144558ef34efacd2fd4047111eae9de115 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -142,14 +142,14 @@ static struct hdac_hdmi_pcm * hdac_hdmi_get_pcm_from_cvt(struct hdac_hdmi_priv *hdmi, struct hdac_hdmi_cvt *cvt) { - struct hdac_hdmi_pcm *pcm = NULL; + struct hdac_hdmi_pcm *pcm; list_for_each_entry(pcm, &hdmi->pcm_list, head) { if (pcm->cvt == cvt) - break; + return pcm; } - return pcm; + return NULL; } static void hdac_hdmi_jack_report(struct hdac_hdmi_pcm *pcm, diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c index 68feae262476492ae3a0b30855dbb09e96fa187e..940bdc30753d124625a4bee4621a9461efc0f523 100644 --- a/sound/soc/codecs/pcm512x.c +++ b/sound/soc/codecs/pcm512x.c @@ -1438,13 +1438,15 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap) } pcm512x->sclk = devm_clk_get(dev, NULL); - if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto err; + } if (!IS_ERR(pcm512x->sclk)) { ret = clk_prepare_enable(pcm512x->sclk); if (ret != 0) { dev_err(dev, "Failed to enable SCLK: %d\n", ret); - return ret; + goto err; } } diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index ca8a70ab22a8253f20bf904600eaf03fdeea2f91..d64cb28e8dc5cc8425d992ff8e462ded9bd49bc5 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1563,6 +1563,40 @@ static int sgtl5000_i2c_probe(struct i2c_client *client, dev_err(&client->dev, "Error %d initializing CHIP_CLK_CTRL\n", ret); + /* Mute everything to avoid pop from the following power-up */ + ret = regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_CTRL, + SGTL5000_CHIP_ANA_CTRL_DEFAULT); + if (ret) { + dev_err(&client->dev, + "Error %d muting outputs via CHIP_ANA_CTRL\n", ret); + goto disable_clk; + } + + /* + * If VAG is powered-on (e.g. from previous boot), it would be disabled + * by the write to ANA_POWER in later steps of the probe code. This + * may create a loud pop even with all outputs muted. The proper way + * to circumvent this is disabling the bit first and waiting the proper + * cool-down time. + */ + ret = regmap_read(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, &value); + if (ret) { + dev_err(&client->dev, "Failed to read ANA_POWER: %d\n", ret); + goto disable_clk; + } + if (value & SGTL5000_VAG_POWERUP) { + ret = regmap_update_bits(sgtl5000->regmap, + SGTL5000_CHIP_ANA_POWER, + SGTL5000_VAG_POWERUP, + 0); + if (ret) { + dev_err(&client->dev, "Error %d disabling VAG\n", ret); + goto disable_clk; + } + + msleep(SGTL5000_VAG_POWERDOWN_DELAY); + } + /* Follow section 2.2.1.1 of AN3663 */ ana_pwr = SGTL5000_ANA_POWER_DEFAULT; if (sgtl5000->num_supplies <= VDDD) { diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 22f3442af9826ce5868619bf993123bafdb0c82f..9ea41749d0375a20d17991d063303e170938de3c 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -236,6 +236,7 @@ /* * SGTL5000_CHIP_ANA_CTRL */ +#define SGTL5000_CHIP_ANA_CTRL_DEFAULT 0x0133 #define SGTL5000_LINE_OUT_MUTE 0x0100 #define SGTL5000_HP_SEL_MASK 0x0040 #define SGTL5000_HP_SEL_SHIFT 6 diff --git a/sound/soc/intel/atom/sst-atom-controls.c b/sound/soc/intel/atom/sst-atom-controls.c index 0f3604b5594240ed74b6fb1d4661efe36665b147..999eb3ba78672c86fc554db821bc8f07170bac3e 100644 --- a/sound/soc/intel/atom/sst-atom-controls.c +++ b/sound/soc/intel/atom/sst-atom-controls.c @@ -974,7 +974,9 @@ static int sst_set_be_modules(struct snd_soc_dapm_widget *w, dev_dbg(c->dev, "Enter: widget=%s\n", w->name); if (SND_SOC_DAPM_EVENT_ON(event)) { + mutex_lock(&drv->lock); ret = sst_send_slot_map(drv); + mutex_unlock(&drv->lock); if (ret) return ret; ret = sst_send_pipe_module_params(w, k); @@ -1341,7 +1343,7 @@ int sst_send_pipe_gains(struct snd_soc_dai *dai, int stream, int mute) dai->capture_widget->name); w = dai->capture_widget; snd_soc_dapm_widget_for_each_source_path(w, p) { - if (p->connected && !p->connected(w, p->sink)) + if (p->connected && !p->connected(w, p->source)) continue; if (p->connect && p->source->power && diff --git a/sound/soc/intel/atom/sst/sst_pci.c b/sound/soc/intel/atom/sst/sst_pci.c index 6906ee624cf6f4e16caa6456fdad8e80ed03cd4e..438c7bcd8c4cda40616566f6979dd402d44ab5c6 100644 --- a/sound/soc/intel/atom/sst/sst_pci.c +++ b/sound/soc/intel/atom/sst/sst_pci.c @@ -107,7 +107,7 @@ static int sst_platform_get_resources(struct intel_sst_drv *ctx) dev_dbg(ctx->dev, "DRAM Ptr %p\n", ctx->dram); do_release_regions: pci_release_regions(pci); - return 0; + return ret; } /* diff --git a/sound/soc/intel/skylake/skl-debug.c b/sound/soc/intel/skylake/skl-debug.c index 1987f78ea91e49f14188827f081d347662bc4fb1..71c6bbf37b6c8e72e56da771ad0598e4925ef839 100644 --- a/sound/soc/intel/skylake/skl-debug.c +++ b/sound/soc/intel/skylake/skl-debug.c @@ -42,8 +42,8 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, int i; ssize_t ret = 0; - for (i = 0; i < max_pin; i++) - ret += snprintf(buf + size, MOD_BUF - size, + for (i = 0; i < max_pin; i++) { + ret += scnprintf(buf + size, MOD_BUF - size, "%s %d\n\tModule %d\n\tInstance %d\n\t" "In-used %s\n\tType %s\n" "\tState %d\n\tIndex %d\n", @@ -53,13 +53,15 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, m_pin[i].in_use ? "Used" : "Unused", m_pin[i].is_dynamic ? "Dynamic" : "Static", m_pin[i].pin_state, i); + size += ret; + } return ret; } static ssize_t skl_print_fmt(struct skl_module_fmt *fmt, char *buf, ssize_t size, bool direction) { - return snprintf(buf + size, MOD_BUF - size, + return scnprintf(buf + size, MOD_BUF - size, "%s\n\tCh %d\n\tFreq %d\n\tBit depth %d\n\t" "Valid bit depth %d\n\tCh config %#x\n\tInterleaving %d\n\t" "Sample Type %d\n\tCh Map %#x\n", @@ -81,16 +83,16 @@ static ssize_t module_read(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; - ret = snprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" + ret = scnprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" "\tInstance id %d\n\tPvt_id %d\n", mconfig->guid, mconfig->id.module_id, mconfig->id.instance_id, mconfig->id.pvt_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Resources:\n\tMCPS %#x\n\tIBS %#x\n\tOBS %#x\t\n", mconfig->mcps, mconfig->ibs, mconfig->obs); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module data:\n\tCore %d\n\tIn queue %d\n\t" "Out queue %d\n\tType %s\n", mconfig->core_id, mconfig->max_in_queue, @@ -100,38 +102,38 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_fmt(mconfig->in_fmt, buf, ret, true); ret += skl_print_fmt(mconfig->out_fmt, buf, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Fixup:\n\tParams %#x\n\tConverter %#x\n", mconfig->params_fixup, mconfig->converter); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module Gateway:\n\tType %#x\n\tVbus %#x\n\tHW conn %#x\n\tSlot %#x\n", mconfig->dev_type, mconfig->vbus_id, mconfig->hw_conn_type, mconfig->time_slot); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Pipeline:\n\tID %d\n\tPriority %d\n\tConn Type %d\n\t" "Pages %#x\n", mconfig->pipe->ppl_id, mconfig->pipe->pipe_priority, mconfig->pipe->conn_type, mconfig->pipe->memory_pages); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tParams:\n\t\tHost DMA %d\n\t\tLink DMA %d\n", mconfig->pipe->p_params->host_dma_id, mconfig->pipe->p_params->link_dma_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tPCM params:\n\t\tCh %d\n\t\tFreq %d\n\t\tFormat %d\n", mconfig->pipe->p_params->ch, mconfig->pipe->p_params->s_freq, mconfig->pipe->p_params->s_fmt); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tLink %#x\n\tStream %#x\n", mconfig->pipe->p_params->linktype, mconfig->pipe->p_params->stream); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tState %d\n\tPassthru %s\n", mconfig->pipe->state, mconfig->pipe->passthru ? "true" : "false"); @@ -141,7 +143,7 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_pins(mconfig->m_out_pin, buf, mconfig->max_out_queue, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Other:\n\tDomain %d\n\tHomogenous Input %s\n\t" "Homogenous Output %s\n\tIn Queue Mask %d\n\t" "Out Queue Mask %d\n\tDMA ID %d\n\tMem Pages %d\n\t" @@ -199,7 +201,7 @@ static ssize_t fw_softreg_read(struct file *file, char __user *user_buf, __ioread32_copy(d->fw_read_buff, fw_reg_addr, w0_stat_sz >> 2); for (offset = 0; offset < FW_REG_SIZE; offset += 16) { - ret += snprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); + ret += scnprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); hex_dump_to_buffer(d->fw_read_buff + offset, 16, 16, 4, tmp + ret, FW_REG_BUF - ret, 0); ret += strlen(tmp + ret); diff --git a/sound/soc/jz4740/jz4740-i2s.c b/sound/soc/jz4740/jz4740-i2s.c index 99394c03699819343d351a932ec84799001e7d2d..e099c0505b765210c7f1ff02380dff4dc513da33 100644 --- a/sound/soc/jz4740/jz4740-i2s.c +++ b/sound/soc/jz4740/jz4740-i2s.c @@ -92,7 +92,7 @@ #define JZ_AIC_I2S_STATUS_BUSY BIT(2) #define JZ_AIC_CLK_DIV_MASK 0xf -#define I2SDIV_DV_SHIFT 8 +#define I2SDIV_DV_SHIFT 0 #define I2SDIV_DV_MASK (0xf << I2SDIV_DV_SHIFT) #define I2SDIV_IDV_SHIFT 8 #define I2SDIV_IDV_MASK (0xf << I2SDIV_IDV_SHIFT) diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 4d948757d300d04be3bfca3c63f78c68085a8506..5e5ed547547397b609fa10693dda891ca92858b2 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -172,7 +172,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, i; for_each_rsnd_mod_array(i, pos, io, rsnd_ssi_array) { - shift = (i * 4) + 16; + shift = (i * 4) + 20; val = (val & ~(0xF << shift)) | rsnd_mod_id(pos) << shift; } diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 420dbbe5d2e22a3faab4c6b888c53e5128ae481d..ec1aa2bbc297b8d734069b026fd8bbd593d32613 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -415,7 +415,7 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget, memset(&template, 0, sizeof(template)); template.reg = e->reg; - template.mask = e->mask << e->shift_l; + template.mask = e->mask; template.shift = e->shift_l; template.off_val = snd_soc_enum_item_to_val(e, 0); template.on_val = template.off_val; @@ -541,8 +541,22 @@ static bool dapm_kcontrol_set_value(const struct snd_kcontrol *kcontrol, if (data->value == value) return false; - if (data->widget) - data->widget->on_val = value; + if (data->widget) { + switch (dapm_kcontrol_get_wlist(kcontrol)->widgets[0]->id) { + case snd_soc_dapm_switch: + case snd_soc_dapm_mixer: + case snd_soc_dapm_mixer_named_ctl: + data->widget->on_val = value & data->widget->mask; + break; + case snd_soc_dapm_demux: + case snd_soc_dapm_mux: + data->widget->on_val = value >> data->widget->shift; + break; + default: + data->widget->on_val = value; + break; + } + } data->value = value; @@ -801,7 +815,13 @@ static void dapm_set_mixer_path_status(struct snd_soc_dapm_path *p, int i, val = max - val; p->connect = !!val; } else { - p->connect = 0; + /* since a virtual mixer has no backing registers to + * decide which path to connect, it will try to match + * with initial state. This is to ensure + * that the default mixer choice will be + * correctly powered up during initialization. + */ + p->connect = invert; } } @@ -4500,7 +4520,7 @@ static void soc_dapm_shutdown_dapm(struct snd_soc_dapm_context *dapm) continue; if (w->power) { dapm_seq_insert(w, &down_list, false); - w->power = 0; + w->new_power = 0; powerdown = 1; } } diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 1e317f5e4f1c1c25d071485da4b0eab31573b56e..54e4807a94b841e447c18c76bf243ab6d6aaaed4 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -838,7 +838,7 @@ int snd_soc_get_xr_sx(struct snd_kcontrol *kcontrol, unsigned int regbase = mc->regbase; unsigned int regcount = mc->regcount; unsigned int regwshift = component->val_bytes * BITS_PER_BYTE; - unsigned int regwmask = (1<invert; unsigned long mask = (1UL<nbits)-1; long min = mc->min; @@ -887,7 +887,7 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, unsigned int regbase = mc->regbase; unsigned int regcount = mc->regcount; unsigned int regwshift = component->val_bytes * BITS_PER_BYTE; - unsigned int regwmask = (1<invert; unsigned long mask = (1UL<nbits)-1; long max = mc->max; diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index d65036b9199f14da2938ededb3f069b8791f82f0..471620a11eeca127cf44c7de831eb8812911899b 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2240,7 +2240,8 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, switch (cmd) { case SNDRV_PCM_TRIGGER_START: if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP)) + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED)) continue; ret = dpcm_do_trigger(dpcm, be_substream, cmd); @@ -2270,7 +2271,8 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, be->dpcm[stream].state = SND_SOC_DPCM_STATE_START; break; case SNDRV_PCM_TRIGGER_STOP: - if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) + if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED)) continue; if (!snd_soc_dpcm_can_be_free_stop(fe, be, stream)) @@ -3325,16 +3327,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, ssize_t offset = 0; /* FE state */ - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "[%s - %s]\n", fe->dai_link->name, stream ? "Capture" : "Playback"); - offset += snprintf(buf + offset, size - offset, "State: %s\n", + offset += scnprintf(buf + offset, size - offset, "State: %s\n", dpcm_state_string(fe->dpcm[stream].state)); if ((fe->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (fe->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), @@ -3342,10 +3344,10 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, params_rate(params)); /* BEs state */ - offset += snprintf(buf + offset, size - offset, "Backends:\n"); + offset += scnprintf(buf + offset, size - offset, "Backends:\n"); if (list_empty(&fe->dpcm[stream].be_clients)) { - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " No active DSP links\n"); goto out; } @@ -3354,16 +3356,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, struct snd_soc_pcm_runtime *be = dpcm->be; params = &dpcm->hw_params; - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "- %s\n", be->dai_link->name); - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " State: %s\n", dpcm_state_string(be->dpcm[stream].state)); if ((be->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 72301bcad3bddde53465a7b29f06e5ca0ba60813..50aa45525be5a6607986749e83d6a99c9232fde2 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -421,7 +421,7 @@ static int soc_tplg_add_kcontrol(struct soc_tplg *tplg, struct snd_soc_component *comp = tplg->comp; return soc_tplg_add_dcontrol(comp->card->snd_card, - comp->dev, k, NULL, comp, kcontrol); + comp->dev, k, comp->name_prefix, comp, kcontrol); } /* remove a mixer kcontrol */ @@ -1954,7 +1954,9 @@ static int soc_tplg_pcm_elems_load(struct soc_tplg *tplg, _pcm = pcm; } else { abi_match = false; - pcm_new_ver(tplg, pcm, &_pcm); + ret = pcm_new_ver(tplg, pcm, &_pcm); + if (ret < 0) + return ret; } /* create the FE DAIs and DAI links */ @@ -2177,8 +2179,11 @@ static int soc_tplg_link_elems_load(struct soc_tplg *tplg, } ret = soc_tplg_link_config(tplg, _link); - if (ret < 0) + if (ret < 0) { + if (!abi_match) + kfree(_link); return ret; + } /* offset by version-specific struct size and * real priv data size @@ -2330,7 +2335,7 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, { struct snd_soc_tplg_manifest *manifest, *_manifest; bool abi_match; - int err; + int ret = 0; if (tplg->pass != SOC_TPLG_PASS_MANIFEST) return 0; @@ -2343,19 +2348,19 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, _manifest = manifest; } else { abi_match = false; - err = manifest_new_ver(tplg, manifest, &_manifest); - if (err < 0) - return err; + ret = manifest_new_ver(tplg, manifest, &_manifest); + if (ret < 0) + return ret; } /* pass control to component driver for optional further init */ if (tplg->comp && tplg->ops && tplg->ops->manifest) - return tplg->ops->manifest(tplg->comp, _manifest); + ret = tplg->ops->manifest(tplg->comp, _manifest); if (!abi_match) /* free the duplicated one */ kfree(_manifest); - return 0; + return ret; } /* validate header magic, size and type */ diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index 7a312168f8647acd364e9d0d97c9ef7801f51289..a031f25031b4c84cf0a93c806d01c56a24f5060a 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -71,6 +71,7 @@ #define SUN8I_SYS_SR_CTRL_AIF1_FS_MASK GENMASK(15, 12) #define SUN8I_SYS_SR_CTRL_AIF2_FS_MASK GENMASK(11, 8) +#define SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK GENMASK(3, 2) #define SUN8I_AIF1CLK_CTRL_AIF1_WORD_SIZ_MASK GENMASK(5, 4) #define SUN8I_AIF1CLK_CTRL_AIF1_LRCK_DIV_MASK GENMASK(8, 6) #define SUN8I_AIF1CLK_CTRL_AIF1_BCLK_DIV_MASK GENMASK(12, 9) @@ -221,7 +222,7 @@ static int sun8i_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } regmap_update_bits(scodec->regmap, SUN8I_AIF1CLK_CTRL, - BIT(SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT), + SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK, value << SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT); return 0; diff --git a/sound/usb/format.c b/sound/usb/format.c index eaf2615cb05d46d4f5c4e7d3136fd781f7a051ca..2227b4cea3383ff17c01ec1c6182ca90c1723b1a 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -249,6 +249,52 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof return 0; } +/* + * Many Focusrite devices supports a limited set of sampling rates per + * altsetting. Maximum rate is exposed in the last 4 bytes of Format Type + * descriptor which has a non-standard bLength = 10. + */ +static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, + struct audioformat *fp, + unsigned int rate) +{ + struct usb_interface *iface; + struct usb_host_interface *alts; + unsigned char *fmt; + unsigned int max_rate; + + iface = usb_ifnum_to_if(chip->dev, fp->iface); + if (!iface) + return true; + + alts = &iface->altsetting[fp->altset_idx]; + fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, + NULL, UAC_FORMAT_TYPE); + if (!fmt) + return true; + + if (fmt[0] == 10) { /* bLength */ + max_rate = combine_quad(&fmt[6]); + + /* Validate max rate */ + if (max_rate != 48000 && + max_rate != 96000 && + max_rate != 192000 && + max_rate != 384000) { + + usb_audio_info(chip, + "%u:%d : unexpected max rate: %u\n", + fp->iface, fp->altsetting, max_rate); + + return true; + } + + return rate <= max_rate; + } + + return true; +} + /* * Helper function to walk the array of sample rate triplets reported by * the device. The problem is that we need to parse whole array first to @@ -285,6 +331,11 @@ static int parse_uac2_sample_rate_range(struct snd_usb_audio *chip, } for (rate = min; rate <= max; rate += res) { + /* Filter out invalid rates on Focusrite devices */ + if (USB_ID_VENDOR(chip->usb_id) == 0x1235 && + !focusrite_valid_sample_rate(chip, fp, rate)) + goto skip_rate; + if (fp->rate_table) fp->rate_table[nr_rates] = rate; if (!fp->rate_min || rate < fp->rate_min) @@ -299,6 +350,7 @@ static int parse_uac2_sample_rate_range(struct snd_usb_audio *chip, break; } +skip_rate: /* avoid endless loop */ if (res == 0) break; diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index b223de3defc4f4a17059549bf34680acfb161643..bf4eacc53a7d26820fe166e16980f1815d3fbdc5 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -313,7 +313,7 @@ static void line6_data_received(struct urb *urb) line6_midibuf_read(mb, line6->buffer_message, LINE6_MIDI_MESSAGE_MAXLEN); - if (done == 0) + if (done <= 0) break; line6->message_length = done; diff --git a/sound/usb/line6/midibuf.c b/sound/usb/line6/midibuf.c index 36a610ba342ec190b7b44c4a842fe93251380241..c931d48801ebed6626fdbfd0491d672d29c917b3 100644 --- a/sound/usb/line6/midibuf.c +++ b/sound/usb/line6/midibuf.c @@ -163,7 +163,7 @@ int line6_midibuf_read(struct midi_buffer *this, unsigned char *data, int midi_length_prev = midibuf_message_length(this->command_prev); - if (midi_length_prev > 0) { + if (midi_length_prev > 1) { midi_length = midi_length_prev - 1; repeat = 1; } else diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 14f953679f9fca1981361ad62151610203dcb23a..211c3648a45d18d29e363b74cb79b1439798f8f6 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2625,7 +2625,7 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer) if (map->id == state.chip->usb_id) { state.map = map->map; state.selector_map = map->selector_map; - mixer->ignore_ctl_error = map->ignore_ctl_error; + mixer->ignore_ctl_error |= map->ignore_ctl_error; break; } } diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index eaa03acd4686bdd20e19b69502c23ca465977ade..26ce6838e84299d3bbec04b5745f0376d3619b59 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -363,6 +363,14 @@ static const struct usbmix_name_map dell_alc4020_map[] = { { 0 } }; +/* Some mobos shipped with a dummy HD-audio show the invalid GET_MIN/GET_MAX + * response for Input Gain Pad (id=19, control=12). Skip it. + */ +static const struct usbmix_name_map asus_rog_map[] = { + { 19, NULL, 12 }, /* FU, Input Gain Pad */ + {} +}; + /* * Control map entries */ @@ -482,6 +490,26 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x05a7, 0x1020), .map = bose_companion5_map, }, + { /* Gigabyte TRX40 Aorus Pro WiFi */ + .id = USB_ID(0x0414, 0xa002), + .map = asus_rog_map, + }, + { /* ASUS ROG Zenith II */ + .id = USB_ID(0x0b05, 0x1916), + .map = asus_rog_map, + }, + { /* ASUS ROG Strix */ + .id = USB_ID(0x0b05, 0x1917), + .map = asus_rog_map, + }, + { /* MSI TRX40 Creator */ + .id = USB_ID(0x0db0, 0x0d64), + .map = asus_rog_map, + }, + { /* MSI TRX40 */ + .id = USB_ID(0x0db0, 0x543d), + .map = asus_rog_map, + }, { 0 } /* terminator */ }; diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index b54f7dab8372e7f0071b3aa83218cd05df1534f2..b9ea4a42aee4e3cd62cd9a07f913d804c1e80380 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -1520,11 +1520,15 @@ static int snd_microii_spdif_default_get(struct snd_kcontrol *kcontrol, /* use known values for that card: interface#1 altsetting#1 */ iface = usb_ifnum_to_if(chip->dev, 1); - if (!iface || iface->num_altsetting < 2) - return -EINVAL; + if (!iface || iface->num_altsetting < 2) { + err = -EINVAL; + goto end; + } alts = &iface->altsetting[1]; - if (get_iface_desc(alts)->bNumEndpoints < 1) - return -EINVAL; + if (get_iface_desc(alts)->bNumEndpoints < 1) { + err = -EINVAL; + goto end; + } ep = get_endpoint(alts, 0)->bEndpointAddress; err = snd_usb_ctl_msg(chip->dev, diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 51ee7910e98cf141f4299bedc44142e02c79759b..4872c27f6054398ad06360afef78b675257adbd3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1151,6 +1151,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ + case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */ return true; } return false; diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c index f4b3cda412fcc6d887b997d9e4cf8795900daffd..e75271e731b2d4ca15094e35e72720ca2d0654f6 100644 --- a/sound/usb/usx2y/usX2Yhwdep.c +++ b/sound/usb/usx2y/usX2Yhwdep.c @@ -131,7 +131,7 @@ static int snd_usX2Y_hwdep_dsp_status(struct snd_hwdep *hw, info->num_dsps = 2; // 0: Prepad Data, 1: FPGA Code if (us428->chip_status & USX2Y_STAT_CHIP_INIT) info->chip_ready = 1; - info->version = USX2Y_DRIVER_VERSION; + info->version = USX2Y_DRIVER_VERSION; return 0; } diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index f93b355756e6027e694257c0e2ab829767c7be98..2dfc0abf2e37310e6df192cedced4db0e94494e4 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -689,6 +689,8 @@ static int usX2Y_rate_set(struct usX2Ydev *usX2Y, int rate) us->submitted = 2*NOOF_SETRATE_URBS; for (i = 0; i < NOOF_SETRATE_URBS; ++i) { struct urb *urb = us->urb[i]; + if (!urb) + continue; if (urb->status) { if (!err) err = -ENODEV; diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 8cb504d3038447fe4cbb2f033d6002e8b714b252..5ef1c15e88ad2e3d4d4483a6a31c88770b939810 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -136,7 +136,7 @@ static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, msg.g.version = 0x1; na = (struct nlattr *) GENLMSG_DATA(&msg); na->nla_type = nla_type; - na->nla_len = nla_len + 1 + NLA_HDRLEN; + na->nla_len = nla_len + NLA_HDRLEN; memcpy(NLA_DATA(na), nla_data, nla_len); msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index 6a73c06e069c9d0eabe52899138a14a06d1a53be..3dbf7e8b07a59887143d66f02812b2bc95f54092 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -35,7 +35,7 @@ $(OUTPUT)include/linux/gpio.h: ../../include/uapi/linux/gpio.h prepare: $(OUTPUT)include/linux/gpio.h -GPIO_UTILS_IN := $(output)gpio-utils-in.o +GPIO_UTILS_IN := $(OUTPUT)gpio-utils-in.o $(GPIO_UTILS_IN): prepare FORCE $(Q)$(MAKE) $(build)=gpio-utils diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index b24afc0e6e81c458c5a2a31c98b5121ea917742c..45b50b89009aafd13a84986c5a6ff6ade86853cc 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -210,6 +210,7 @@ static bool fs__env_override(struct fs *fs) size_t name_len = strlen(fs->name); /* name + "_PATH" + '\0' */ char upper_name[name_len + 5 + 1]; + memcpy(upper_name, fs->name, name_len); mem_toupper(upper_name, name_len); strcpy(&upper_name[name_len], "_PATH"); @@ -219,7 +220,8 @@ static bool fs__env_override(struct fs *fs) return false; fs->found = true; - strncpy(fs->path, override_path, sizeof(fs->path)); + strncpy(fs->path, override_path, sizeof(fs->path) - 1); + fs->path[sizeof(fs->path) - 1] = '\0'; return true; } diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt index 0a0e9112f2842e5b40e4e2284b2c061c4d98370a..5cb9f009f2be3f32c2bd8316d0e5f9805f6b4ea5 100644 --- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt +++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt @@ -909,7 +909,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5422543faff85fc882c42257142903e0c2a37ca7..04fc04b4ab67e03d798ff945a0dbe6d50b92daf0 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -915,10 +915,7 @@ static struct rela *find_switch_table(struct objtool_file *file, * it. */ for (; - &insn->list != &file->insn_list && - insn->sec == func->sec && - insn->offset >= func->offset; - + &insn->list != &file->insn_list && insn->func && insn->func->pfunc == func; insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) @@ -2065,14 +2062,27 @@ static bool ignore_unreachable_insn(struct instruction *insn) !strcmp(insn->sec->name, ".altinstr_aux")) return true; + if (!insn->func) + return false; + + /* + * CONFIG_UBSAN_TRAP inserts a UD2 when it sees + * __builtin_unreachable(). The BUG() macro has an unreachable() after + * the UD2, which causes GCC's undefined trap logic to emit another UD2 + * (or occasionally a JMP to UD2). + */ + if (list_prev_entry(insn, list)->dead_end && + (insn->type == INSN_BUG || + (insn->type == INSN_JUMP_UNCONDITIONAL && + insn->jump_dest && insn->jump_dest->type == INSN_BUG))) + return true; + /* * Check if this (or a subsequent) instruction is related to * CONFIG_UBSAN or CONFIG_KASAN. * * End the search at 5 instructions to avoid going into the weeds. */ - if (!insn->func) - return false; for (i = 0; i < 5; i++) { if (is_kasan_insn(insn) || is_ubsan_insn(insn)) diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index c3343820916a6dccf0e49bf50cbfbc811b4525fd..7cbbbdd932f1dfb8beaa22fc2f09986b6a5e2291 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -78,7 +78,7 @@ int orc_dump(const char *_objname) char *name; size_t nr_sections; Elf64_Addr orc_ip_addr = 0; - size_t shstrtab_idx; + size_t shstrtab_idx, strtab_idx = 0; Elf *elf; Elf_Scn *scn; GElf_Shdr sh; @@ -139,6 +139,8 @@ int orc_dump(const char *_objname) if (!strcmp(name, ".symtab")) { symtab = data; + } else if (!strcmp(name, ".strtab")) { + strtab_idx = i; } else if (!strcmp(name, ".orc_unwind")) { orc = data->d_buf; orc_size = sh.sh_size; @@ -150,7 +152,7 @@ int orc_dump(const char *_objname) } } - if (!symtab || !orc || !orc_ip) + if (!symtab || !strtab_idx || !orc || !orc_ip) return 0; if (orc_size % sizeof(*orc) != 0) { @@ -171,21 +173,29 @@ int orc_dump(const char *_objname) return -1; } - scn = elf_getscn(elf, sym.st_shndx); - if (!scn) { - WARN_ELF("elf_getscn"); - return -1; - } - - if (!gelf_getshdr(scn, &sh)) { - WARN_ELF("gelf_getshdr"); - return -1; - } - - name = elf_strptr(elf, shstrtab_idx, sh.sh_name); - if (!name || !*name) { - WARN_ELF("elf_strptr"); - return -1; + if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) { + scn = elf_getscn(elf, sym.st_shndx); + if (!scn) { + WARN_ELF("elf_getscn"); + return -1; + } + + if (!gelf_getshdr(scn, &sh)) { + WARN_ELF("gelf_getshdr"); + return -1; + } + + name = elf_strptr(elf, shstrtab_idx, sh.sh_name); + if (!name) { + WARN_ELF("elf_strptr"); + return -1; + } + } else { + name = elf_strptr(elf, strtab_idx, sym.st_name); + if (!name) { + WARN_ELF("elf_strptr"); + return -1; + } } printf("%s+%llx:", name, (unsigned long long)rela.r_addend); diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7902a5681fc89f6eda64234de379fc8122567529..b8fc7d972be9d1ae667b8bd3de4f389e427a1ab8 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -35,7 +35,7 @@ endif # Only pass canonical directory names as the output directory: # ifneq ($(O),) - FULL_O := $(shell readlink -f $(O) || echo $(O)) + FULL_O := $(shell cd $(PWD); readlink -f $(O) || echo $(O)) endif # diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b97e31498ff769734e50f9528ade05475687b365..8baaf9797101c34e862e21398178104e81d1d4d9 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -179,8 +179,17 @@ strip-libs = $(filter-out -l%,$(1)) PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) +# Python 3.8 changed the output of `python-config --ldflags` to not include the +# '-lpythonX.Y' flag unless '--embed' is also passed. The feature check for +# libpython fails if that flag is not included in LDFLAGS +ifeq ($(shell $(PYTHON_CONFIG_SQ) --ldflags --embed 2>&1 1>/dev/null; echo $$?), 0) + PYTHON_CONFIG_LDFLAGS := --ldflags --embed +else + PYTHON_CONFIG_LDFLAGS := --ldflags +endif + ifdef PYTHON_CONFIG - PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) + PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null) PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 628ad5f7eddb6a4295ba9370f987e9194dfe6bed..49a87fb64156ef63e265bf8a4cf501d42cd469c5 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3142,6 +3142,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; } + actions->ms.map = map; top = pstack__peek(browser->pstack); if (top == &browser->hists->dso_filter) { /* diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 63db9872c8808df8df2c87bef8d6d5cae925859e..a49f27aa0c95a5b69c90436ebec90ecade5171ee 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -90,7 +90,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return true; } - if (!strncmp(filename, "/system/lib/", 11)) { + if (!strncmp(filename, "/system/lib/", 12)) { char *ndk, *app; const char *arch; size_t ndk_length; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 893193bd28c1788c8b41bb6cdd7010a3b2751e44..ae0feea4e8b5f9bf9d6a5bb116f7312f436e8e1c 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -626,14 +626,19 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, return -EINVAL; } - /* Try to get actual symbol name from symtab */ - symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); + if (dwarf_entrypc(sp_die, &eaddr) == 0) { + /* If the DIE has entrypc, use it. */ + symbol = dwarf_diename(sp_die); + } else { + /* Try to get actual symbol name and address from symtab */ + symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); + eaddr = sym.st_value; + } if (!symbol) { pr_warning("Failed to find symbol at 0x%lx\n", (unsigned long)paddr); return -ENOENT; } - eaddr = sym.st_value; tp->offset = (unsigned long)(paddr - eaddr); tp->address = (unsigned long)paddr; diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c index 2116df9ad83254eb0a7a37246b83f959e669b7f5..c097a3748674f80fd2bfc3ad94bce044cadc0931 100644 --- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c @@ -83,7 +83,7 @@ static struct pci_access *pci_acc; static struct pci_dev *amd_fam14h_pci_dev; static int nbp1_entered; -struct timespec start_time; +static struct timespec start_time; static unsigned long long timediff; #ifdef DEBUG diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 5b3205f1621749bb6ebc340413ae16d957064cb9..5277df27191f3dad92efbfc471a75654e6fccedc 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -21,7 +21,7 @@ struct cpuidle_monitor cpuidle_sysfs_monitor; static unsigned long long **previous_count; static unsigned long long **current_count; -struct timespec start_time; +static struct timespec start_time; static unsigned long long timediff; static int cpuidle_get_count_percent(unsigned int id, double *percent, diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 05f953f0f0a0cf2b3d479ce6a6dcbc9c0d9a8c28..80a21cb67d94f4cd418834c3bc2274ddb6a3af10 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -29,6 +29,8 @@ struct cpuidle_monitor *all_monitors[] = { 0 }; +int cpu_count; + static struct cpuidle_monitor *monitors[MONITORS_MAX]; static unsigned int avail_monitors; diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h index 9e43f3371fbc625eaabaaeb941ae3a14a732ff7f..3558bbae2b5dcf17d78d7e57bc011d5d3751f3b8 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -18,7 +18,7 @@ #define CSTATE_NAME_LEN 5 #define CSTATE_DESC_LEN 60 -int cpu_count; +extern int cpu_count; /* Hard to define the right names ...: */ enum power_range_e { diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 19e345cf8193e59a4ccc7b0ce8463460d41a6e28..0692f2efc25eff688808d8173501443f1ec7d946 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4650,9 +4650,9 @@ int add_counter(unsigned int msr_num, char *path, char *name, } msrp->msr_num = msr_num; - strncpy(msrp->name, name, NAME_BYTES); + strncpy(msrp->name, name, NAME_BYTES - 1); if (path) - strncpy(msrp->path, path, PATH_BYTES); + strncpy(msrp->path, path, PATH_BYTES - 1); msrp->width = width; msrp->type = type; msrp->format = format; diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 71dc7efc7efa4fc401db7c47de68b00d9aeb0412..df247f39d7c593f61289f7d95a97b180d0474e21 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 ifneq ($(O),) ifeq ($(origin O), command line) - dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) - ABSOLUTE_O := $(shell cd $(O) ; pwd) + dummy := $(if $(shell cd $(PWD); test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) + ABSOLUTE_O := $(shell cd $(PWD); cd $(O) ; pwd) OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/) COMMAND_O := O=$(ABSOLUTE_O) ifeq ($(objtree),) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 0c8b61f8398edace8b4d7be42e50b8638679520d..3bdd6a463819d5e55adc21e7b264d018d9cc0270 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1345,7 +1345,7 @@ sub reboot { } else { # Make sure everything has been written to disk - run_ssh("sync"); + run_ssh("sync", 10); if (defined($time)) { start_monitor; diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index c5587844fbb8c6e2e5254ea050f6167c834186ad..ad723a5d0f831779d5db43625169935627b30486 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -137,7 +137,7 @@ int dump_queue(struct msgque_data *msgque) for (kern_id = 0; kern_id < 256; kern_id++) { ret = msgctl(kern_id, MSG_STAT, &ds); if (ret < 0) { - if (errno == -EINVAL) + if (errno == EINVAL) continue; printf("Failed to get stats for IPC queue with id %d\n", kern_id); diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 7956ea3be6675ff865e6671a2b47eba87fc30b06..eed5d5b81226b2ccee702ac4298dfb702f363d81 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -502,18 +502,23 @@ function test_num() fi } -function get_test_count() +function get_test_data() { test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + local field_num=$(echo $1 | sed 's/^0*//') + echo $ALL_TESTS | awk '{print $'$field_num'}' +} + +function get_test_count() +{ + TEST_DATA=$(get_test_data $1) LAST_TWO=${TEST_DATA#*:*} echo ${LAST_TWO%:*} } function get_test_enabled() { - test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + TEST_DATA=$(get_test_data $1) echo ${TEST_DATA#*:*:} } diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 5bef05d6ba3935c4c288faec8c8efc83ed4a3849..c9be64dc681d2fe6289a0658f64e1f77aeac7aa2 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -54,17 +54,20 @@ else $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS)) endif +define INSTALL_SINGLE_RULE + $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)) + $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) + $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) +endef + define INSTALL_RULE - @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ - fi - @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ - fi + $(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_FILES)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE) endef install: all diff --git a/tools/testing/selftests/size/get_size.c b/tools/testing/selftests/size/get_size.c index d4b59ab979a0939f71684c4fcbfa09d8be7c907e..f55943b6d1e2ab9796c844438e2e964c8bd6724a 100644 --- a/tools/testing/selftests/size/get_size.c +++ b/tools/testing/selftests/size/get_size.c @@ -12,23 +12,35 @@ * own execution. It also attempts to have as few dependencies * on kernel features as possible. * - * It should be statically linked, with startup libs avoided. - * It uses no library calls, and only the following 3 syscalls: + * It should be statically linked, with startup libs avoided. It uses + * no library calls except the syscall() function for the following 3 + * syscalls: * sysinfo(), write(), and _exit() * * For output, it avoids printf (which in some C libraries * has large external dependencies) by implementing it's own * number output and print routines, and using __builtin_strlen() + * + * The test may crash if any of the above syscalls fails because in some + * libc implementations (e.g. the GNU C Library) errno is saved in + * thread-local storage, which does not get initialized due to avoiding + * startup libs. */ #include #include +#include #define STDOUT_FILENO 1 static int print(const char *s) { - return write(STDOUT_FILENO, s, __builtin_strlen(s)); + size_t len = 0; + + while (s[len] != '\0') + len++; + + return syscall(SYS_write, STDOUT_FILENO, s, len); } static inline char *num_to_str(unsigned long num, char *buf, int len) @@ -80,12 +92,12 @@ void _start(void) print("TAP version 13\n"); print("# Testing system size.\n"); - ccode = sysinfo(&info); + ccode = syscall(SYS_sysinfo, &info); if (ccode < 0) { print("not ok 1"); print(test_name); print(" ---\n reason: \"could not get sysinfo\"\n ...\n"); - _exit(ccode); + syscall(SYS_exit, ccode); } print("ok 1"); print(test_name); @@ -101,5 +113,5 @@ void _start(void) print(" ...\n"); print("1..1\n"); - _exit(0); + syscall(SYS_exit, 0); } diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index 6f22238f32173db4ef4d710666807cb5397f7dc0..12aaa063196e7406a08188a64570d46ce407ec91 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -414,8 +414,12 @@ int main() #if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16) vsyscall32 = (void *)getauxval(AT_SYSINFO); - printf("[RUN]\tCheck AT_SYSINFO return regs\n"); - test_sys32_regs(do_full_vsyscall32); + if (vsyscall32) { + printf("[RUN]\tCheck AT_SYSINFO return regs\n"); + test_sys32_regs(do_full_vsyscall32); + } else { + printf("[SKIP]\tAT_SYSINFO is not available\n"); + } #endif test_ptrace_syscall_restart(); diff --git a/tools/usb/usbip/src/usbip_network.c b/tools/usb/usbip/src/usbip_network.c index b4c37e76a6e08f7973bcf324dd87a612d621546d..187dfaa67d0a2d8e9b6f1b804eff87dc0de7b321 100644 --- a/tools/usb/usbip/src/usbip_network.c +++ b/tools/usb/usbip/src/usbip_network.c @@ -62,39 +62,39 @@ void usbip_setup_port_number(char *arg) info("using port %d (\"%s\")", usbip_port, usbip_port_string); } -void usbip_net_pack_uint32_t(int pack, uint32_t *num) +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num) { uint32_t i; if (pack) - i = htonl(*num); + i = htonl(num); else - i = ntohl(*num); + i = ntohl(num); - *num = i; + return i; } -void usbip_net_pack_uint16_t(int pack, uint16_t *num) +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num) { uint16_t i; if (pack) - i = htons(*num); + i = htons(num); else - i = ntohs(*num); + i = ntohs(num); - *num = i; + return i; } void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev) { - usbip_net_pack_uint32_t(pack, &udev->busnum); - usbip_net_pack_uint32_t(pack, &udev->devnum); - usbip_net_pack_uint32_t(pack, &udev->speed); + udev->busnum = usbip_net_pack_uint32_t(pack, udev->busnum); + udev->devnum = usbip_net_pack_uint32_t(pack, udev->devnum); + udev->speed = usbip_net_pack_uint32_t(pack, udev->speed); - usbip_net_pack_uint16_t(pack, &udev->idVendor); - usbip_net_pack_uint16_t(pack, &udev->idProduct); - usbip_net_pack_uint16_t(pack, &udev->bcdDevice); + udev->idVendor = usbip_net_pack_uint16_t(pack, udev->idVendor); + udev->idProduct = usbip_net_pack_uint16_t(pack, udev->idProduct); + udev->bcdDevice = usbip_net_pack_uint16_t(pack, udev->bcdDevice); } void usbip_net_pack_usb_interface(int pack __attribute__((unused)), @@ -141,6 +141,14 @@ ssize_t usbip_net_send(int sockfd, void *buff, size_t bufflen) return usbip_net_xmit(sockfd, buff, bufflen, 1); } +static inline void usbip_net_pack_op_common(int pack, + struct op_common *op_common) +{ + op_common->version = usbip_net_pack_uint16_t(pack, op_common->version); + op_common->code = usbip_net_pack_uint16_t(pack, op_common->code); + op_common->status = usbip_net_pack_uint32_t(pack, op_common->status); +} + int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status) { struct op_common op_common; @@ -152,7 +160,7 @@ int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status) op_common.code = code; op_common.status = status; - PACK_OP_COMMON(1, &op_common); + usbip_net_pack_op_common(1, &op_common); rc = usbip_net_send(sockfd, &op_common, sizeof(op_common)); if (rc < 0) { @@ -176,7 +184,7 @@ int usbip_net_recv_op_common(int sockfd, uint16_t *code) goto err; } - PACK_OP_COMMON(0, &op_common); + usbip_net_pack_op_common(0, &op_common); if (op_common.version != USBIP_VERSION) { dbg("version mismatch: %d %d", op_common.version, diff --git a/tools/usb/usbip/src/usbip_network.h b/tools/usb/usbip/src/usbip_network.h index 7032687621d3b503edb38f6ad69926631633c7b4..8e8330c0f1c9c07dd054fe43fbb61af92d734450 100644 --- a/tools/usb/usbip/src/usbip_network.h +++ b/tools/usb/usbip/src/usbip_network.h @@ -34,12 +34,6 @@ struct op_common { } __attribute__((packed)); -#define PACK_OP_COMMON(pack, op_common) do {\ - usbip_net_pack_uint16_t(pack, &(op_common)->version);\ - usbip_net_pack_uint16_t(pack, &(op_common)->code);\ - usbip_net_pack_uint32_t(pack, &(op_common)->status);\ -} while (0) - /* ---------------------------------------------------------------------- */ /* Dummy Code */ #define OP_UNSPEC 0x00 @@ -165,11 +159,11 @@ struct op_devlist_reply_extra { } while (0) #define PACK_OP_DEVLIST_REPLY(pack, reply) do {\ - usbip_net_pack_uint32_t(pack, &(reply)->ndev);\ + (reply)->ndev = usbip_net_pack_uint32_t(pack, (reply)->ndev);\ } while (0) -void usbip_net_pack_uint32_t(int pack, uint32_t *num); -void usbip_net_pack_uint16_t(int pack, uint16_t *num); +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num); +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num); void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev); void usbip_net_pack_usb_interface(int pack, struct usbip_usb_interface *uinf); diff --git a/tools/vm/Makefile b/tools/vm/Makefile index 20f6cf04377f0c257aeb4bacb3133b6a35e9e9b6..9860622cbb151126571ef0962cdab5735c59c5dd 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm tools # +include ../scripts/Makefile.include + TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api diff --git a/usr/Kconfig b/usr/Kconfig index 43658b8a975e57a8d8b98ce0c1fd78e7e810afe3..8b4826de1189f9cdb1f73b412aec078829974138 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -131,17 +131,6 @@ choice If in doubt, select 'None' -config INITRAMFS_COMPRESSION_NONE - bool "None" - help - Do not compress the built-in initramfs at all. This may sound wasteful - in space, but, you should be aware that the built-in initramfs will be - compressed at a later stage anyways along with the rest of the kernel, - on those architectures that support this. However, not compressing the - initramfs may lead to slightly higher memory consumption during a - short time at boot, while both the cpio image and the unpacked - filesystem image will be present in memory simultaneously - config INITRAMFS_COMPRESSION_GZIP bool "Gzip" depends on RD_GZIP @@ -214,6 +203,17 @@ config INITRAMFS_COMPRESSION_LZ4 If you choose this, keep in mind that most distros don't provide lz4 by default which could cause a build failure. +config INITRAMFS_COMPRESSION_NONE + bool "None" + help + Do not compress the built-in initramfs at all. This may sound wasteful + in space, but, you should be aware that the built-in initramfs will be + compressed at a later stage anyways along with the rest of the kernel, + on those architectures that support this. However, not compressing the + initramfs may lead to slightly higher memory consumption during a + short time at boot, while both the cpio image and the unpacked + filesystem image will be present in memory simultaneously + endchoice config INITRAMFS_COMPRESSION diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 745ee09083dd549f967c228d2e5dfa725d2e27e6..71f77ae6c2a6618b498264376f4207b2ce5dc868 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2027,12 +2027,12 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (slots->generation != ghc->generation) __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - if (unlikely(!ghc->memslot)) - return kvm_write_guest(kvm, gpa, data, len); - if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_write_guest(kvm, gpa, data, len); + r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; @@ -2060,12 +2060,12 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (slots->generation != ghc->generation) __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - if (unlikely(!ghc->memslot)) - return kvm_read_guest(kvm, ghc->gpa, data, len); - if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_read_guest(kvm, ghc->gpa, data, len); + r = __copy_from_user(data, (void __user *)ghc->hva, len); if (r) return -EFAULT;